List:Commits« Previous MessageNext Message »
From:jani Date:May 15 2006 8:33pm
Subject:bk commit into 5.1 tree (jani:1.2137)
View as plain text  
Below is the list of changes that have just been committed into a local
5.1 repository of jani. When jani does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet
  1.2137 06/05/15 21:33:47 jani@stripped +1 -0
  Merge jamppa@stripped:/home/bk/mysql-5.1-new
  into  a193-229-222-105.elisa-laajakaista.fi:/home/my/bk/mysql-5.1-new-merge_060509

  sql/sql_select.cc
    1.404 06/05/15 21:33:38 jani@stripped +0 -0
    Auto merged

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	jani
# Host:	a193-229-222-105.elisa-laajakaista.fi
# Root:	/home/my/bk/mysql-5.1-new-merge_060509/RESYNC

--- 1.403/sql/sql_select.cc	2006-05-12 11:26:31 +03:00
+++ 1.404/sql/sql_select.cc	2006-05-15 21:33:38 +03:00
@@ -3377,7 +3377,10 @@
       uint key= keyuse->key;
       KEY *keyinfo= table->key_info+key;
       bool ft_key=  (keyuse->keypart == FT_KEYPART);
-      uint found_ref_or_null= 0;
+      /* Bitmap of keyparts where the ref access is over 'keypart=const': */
+      key_part_map const_part= 0;
+      /* The or-null keypart in ref-or-null access: */
+      key_part_map ref_or_null_part= 0;
 
       /* Calculate how many key segments of the current key we can use */
       start_key= keyuse;
@@ -3389,12 +3392,14 @@
         do
         {
           if (!(remaining_tables & keyuse->used_tables) &&
-              !(found_ref_or_null & keyuse->optimize))
+              !(ref_or_null_part && (keyuse->optimize &
+                                     KEY_OPTIMIZE_REF_OR_NULL)))
           {
             found_part|= keyuse->keypart_map;
-            double tmp= prev_record_reads(join,
-					  (found_ref |
-					   keyuse->used_tables));
+            if (!(keyuse->used_tables & ~join->const_table_map))
+              const_part|= keyuse->keypart_map;
+            double tmp= prev_record_reads(join, (found_ref |
+                                                 keyuse->used_tables));
             if (tmp < best_prev_record_reads)
             {
               best_part_found_ref= keyuse->used_tables;
@@ -3406,8 +3411,8 @@
 	      If there is one 'key_column IS NULL' expression, we can
 	      use this ref_or_null optimisation of this field
 	    */
-	    found_ref_or_null|= (keyuse->optimize &
-				 KEY_OPTIMIZE_REF_OR_NULL);
+            if (keyuse->optimize & KEY_OPTIMIZE_REF_OR_NULL)
+              ref_or_null_part |= keyuse->keypart_map;
           }
           keyuse++;
         } while (keyuse->table == table && keyuse->key == key &&
@@ -3443,7 +3448,7 @@
           Check if we found full key
         */
         if (found_part == PREV_BITS(uint,keyinfo->key_parts) &&
-            !found_ref_or_null)
+            !ref_or_null_part)
         {                                         /* use eq key */
           max_key_part= (uint) ~0;
           if ((keyinfo->flags & (HA_NOSAME | HA_NULL_PART_KEY)) == HA_NOSAME)
@@ -3455,6 +3460,23 @@
           {
             if (!found_ref)
             {                                     /* We found a const key */
+              /*
+                ReuseRangeEstimateForRef-1:
+                We get here if we've found a ref(const) (c_i are constants):
+                  "(keypart1=c1) AND ... AND (keypartN=cN)"   [ref_const_cond]
+                
+                If range optimizer was able to construct a "range" 
+                access on this index, then its condition "quick_cond" was
+                eqivalent to ref_const_cond (*), and we can re-use E(#rows)
+                from the range optimizer.
+                
+                Proof of (*): By properties of range and ref optimizers 
+                quick_cond will be equal or tighther than ref_const_cond. 
+                ref_const_cond already covers "smallest" possible interval - 
+                a singlepoint interval over all keyparts. Therefore, 
+                quick_cond is equivalent to ref_const_cond (if it was an 
+                empty interval we wouldn't have got here).
+              */
               if (table->quick_keys.is_set(key))
                 records= (double) table->quick_rows[key];
               else
@@ -3475,6 +3497,23 @@
                 if (records < 2.0)
                   records=2.0;               /* Can't be as good as a unique */
               }
+              /*
+                ReuseRangeEstimateForRef-2:  We get here if we could not reuse
+                E(#rows) from range optimizer. Make another try:
+                
+                If range optimizer produced E(#rows) for a prefix of the ref
+                access we're considering, and that E(#rows) is lower then our
+                current estimate, make an adjustment. The criteria of when we
+                can make an adjustment is a special case of the criteria used
+                in ReuseRangeEstimateForRef-3.
+              */
+              if (table->quick_keys.is_set(key) &&
+                  const_part & (1 << table->quick_key_parts[key]) &&
+                  table->quick_n_ranges[key] == 1 &&
+                  records > (double) table->quick_rows[key])
+              {
+                records= (double) table->quick_rows[key];
+              }
             }
             /* Limit the number of matched rows */
             tmp= records;
@@ -3503,12 +3542,50 @@
           {
             max_key_part= max_part_bit(found_part);
             /*
-              Check if quick_range could determinate how many rows we
-              will match
+              ReuseRangeEstimateForRef-3:
+              We're now considering a ref[or_null] access via
+              (t.keypart1=e1 AND ... AND t.keypartK=eK) [ OR  
+              (same-as-above but with one cond replaced 
+               with "t.keypart_i IS NULL")]  (**)
+              
+              Try re-using E(#rows) from "range" optimizer:
+              We can do so if "range" optimizer used the same intervals as
+              in (**). The intervals used by range optimizer may be not 
+              available at this point (as "range" access might have choosen to
+              create quick select over another index), so we can't compare
+              them to (**). We'll make indirect judgements instead.
+              The sufficient conditions for re-use are:
+              (C1) All e_i in (**) are constants, i.e. found_ref==FALSE. (if
+                   this is not satisfied we have no way to know which ranges
+                   will be actually scanned by 'ref' until we execute the 
+                   join)
+              (C2) max #key parts in 'range' access == K == max_key_part (this
+                   is apparently a necessary requirement)
+
+              We also have a property that "range optimizer produces equal or 
+              tighter set of scan intervals than ref(const) optimizer". Each
+              of the intervals in (**) are "tightest possible" intervals when 
+              one limits itself to using keyparts 1..K (which we do in #2).              
+              From here it follows that range access used either one, or
+              both of the (I1) and (I2) intervals:
+              
+               (t.keypart1=c1 AND ... AND t.keypartK=eK)  (I1) 
+               (same-as-above but with one cond replaced  
+                with "t.keypart_i IS NULL")               (I2)
+
+              The remaining part is to exclude the situation where range
+              optimizer used one interval while we're considering
+              ref-or-null and looking for estimate for two intervals. This
+              is done by last limitation:
+
+              (C3) "range optimizer used (have ref_or_null?2:1) intervals"
             */
-            if (table->quick_keys.is_set(key) &&
-                table->quick_key_parts[key] == max_key_part)
+            if (table->quick_keys.is_set(key) && !found_ref &&        
 //(C1)
+                table->quick_key_parts[key] == max_key_part &&          //(C2)
+                table->quick_n_ranges[key] == 1+test(ref_or_null_part)) //(C3)
+            {
               tmp= records= (double) table->quick_rows[key];
+            }
             else
             {
               /* Check if we have statistic about the distribution */
@@ -3552,21 +3629,37 @@
                 }
                 records = (ulong) tmp;
               }
+
+              if (ref_or_null_part)
+              {
+                /* We need to do two key searches to find key */
+                tmp *= 2.0;
+                records *= 2.0;
+              }
+
               /*
-                If quick_select was used on a part of this key, we know
-                the maximum number of rows that the key can match.
+                ReuseRangeEstimateForRef-4:  We get here if we could not reuse
+                E(#rows) from range optimizer. Make another try:
+                
+                If range optimizer produced E(#rows) for a prefix of the ref 
+                access we're considering, and that E(#rows) is lower then our
+                current estimate, make the adjustment.
+
+                The decision whether we can re-use the estimate from the range
+                optimizer is the same as in ReuseRangeEstimateForRef-3,
+                applied to first table->quick_key_parts[key] key parts.
               */
               if (table->quick_keys.is_set(key) &&
                   table->quick_key_parts[key] <= max_key_part &&
+                  const_part & (1 << table->quick_key_parts[key]) &&
+                  table->quick_n_ranges[key] == 1 + test(ref_or_null_part &
+                                                         const_part) &&
                   records > (double) table->quick_rows[key])
-                tmp= records= (double) table->quick_rows[key];
-              else if (found_ref_or_null)
               {
-                /* We need to do two key searches to find key */
-                tmp *= 2.0;
-                records *= 2.0;
+                tmp= records= (double) table->quick_rows[key];
               }
             }
+
             /* Limit the number of matched rows */
             set_if_smaller(tmp, (double) thd->variables.max_seeks_for_key);
             if (table->used_keys.is_set(key))
@@ -4382,344 +4475,11 @@
     if ((rest_tables & real_table_bit) && !(rest_tables &
s->dependent) &&
         (!idx|| !check_interleaving_with_nj(join->positions[idx-1].table, s)))
     {
-      double best,best_time,records;
-      best=best_time=records=DBL_MAX;
-      KEYUSE *best_key=0;
-      uint best_max_key_part=0;
-      my_bool found_constraint= 0;
-
-      if (s->keyuse)
-      {						/* Use key if possible */
-	TABLE *table=s->table;
-	KEYUSE *keyuse,*start_key=0;
-	double best_records=DBL_MAX;
-	uint max_key_part=0;
-
-	/* Test how we can use keys */
-	rec= s->records/MATCHING_ROWS_IN_OTHER_TABLE;  // Assumed records/key
-	for (keyuse=s->keyuse ; keyuse->table == table ;)
-	{
-	  key_part_map found_part=0;
-	  table_map found_ref=0;
-	  uint key=keyuse->key;
-	  KEY *keyinfo=table->key_info+key;
-          bool ft_key=(keyuse->keypart == FT_KEYPART);
-	  uint found_ref_or_null= 0;
-
-	  /* Calculate how many key segments of the current key we can use */
-	  start_key=keyuse;
-	  do
-	  {
-            uint keypart=keyuse->keypart;
-            table_map best_part_found_ref= 0;
-            double best_prev_record_reads= DBL_MAX;
-	    do
-	    {
-	      if (!(rest_tables & keyuse->used_tables) &&
-		  !(found_ref_or_null & keyuse->optimize))
-	      {
-		found_part|=keyuse->keypart_map;
-                double tmp= prev_record_reads(join,
-					      (found_ref |
-					       keyuse->used_tables));
-                if (tmp < best_prev_record_reads)
-                {
-                  best_part_found_ref= keyuse->used_tables;
-                  best_prev_record_reads= tmp;
-                }
-		if (rec > keyuse->ref_table_rows)
-		  rec= keyuse->ref_table_rows;
-		/*
-		  If there is one 'key_column IS NULL' expression, we can
-		  use this ref_or_null optimisation of this field
-		*/
-		found_ref_or_null|= (keyuse->optimize &
-				     KEY_OPTIMIZE_REF_OR_NULL);
-              }
-	      keyuse++;
-	    } while (keyuse->table == table && keyuse->key == key &&
-		     keyuse->keypart == keypart);
-	    found_ref|= best_part_found_ref;
-	  } while (keyuse->table == table && keyuse->key == key);
-
-	  /*
-	    Assume that that each key matches a proportional part of table.
-	  */
-          if (!found_part && !ft_key)
-	    continue;				// Nothing usable found
-	  if (rec < MATCHING_ROWS_IN_OTHER_TABLE)
-	    rec= MATCHING_ROWS_IN_OTHER_TABLE;	// Fix for small tables
-
-          /*
-	    ft-keys require special treatment
-          */
-          if (ft_key)
-          {
-            /*
-	      Really, there should be records=0.0 (yes!)
-	      but 1.0 would be probably safer
-            */
-            tmp=prev_record_reads(join,found_ref);
-            records=1.0;
-          }
-          else
-          {
-	  found_constraint= 1;
-	  /*
-	    Check if we found full key
-	  */
-	  if (found_part == PREV_BITS(uint,keyinfo->key_parts) &&
-	      !found_ref_or_null)
-	  {				/* use eq key */
-	    max_key_part= (uint) ~0;
-	    if ((keyinfo->flags & (HA_NOSAME | HA_NULL_PART_KEY |
-				   HA_END_SPACE_KEY)) == HA_NOSAME)
-	    {
-	      tmp=prev_record_reads(join,found_ref);
-	      records=1.0;
-	    }
-	    else
-	    {
-	      if (!found_ref)
-	      {					// We found a const key
-		if (table->quick_keys.is_set(key))
-		  records= (double) table->quick_rows[key];
-		else
-		{
-		  /* quick_range couldn't use key! */
-		  records= (double) s->records/rec;
-		}
-	      }
-	      else
-	      {
-		if (!(records=keyinfo->rec_per_key[keyinfo->key_parts-1]))
-		{				// Prefere longer keys
-		  records=
-		    ((double) s->records / (double) rec *
-		     (1.0 +
-		      ((double) (table->s->max_key_length-keyinfo->key_length) /
-		       (double) table->s->max_key_length)));
-		  if (records < 2.0)
-		    records=2.0;		// Can't be as good as a unique
-		}
-	      }
-	      /* Limit the number of matched rows */
-	      tmp= records;
-	      set_if_smaller(tmp, (double) thd->variables.max_seeks_for_key);
-	      if (table->used_keys.is_set(key))
-	      {
-		/* we can use only index tree */
-		uint keys_per_block= table->file->block_size/2/
-		  (keyinfo->key_length+table->file->ref_length)+1;
-		tmp=record_count*(tmp+keys_per_block-1)/keys_per_block;
-	      }
-	      else
-		tmp=record_count*min(tmp,s->worst_seeks);
-	    }
-	  }
-	  else
-	  {
-	    /*
-	      Use as much key-parts as possible and a uniq key is better
-	      than a not unique key
-	      Set tmp to (previous record count) * (records / combination)
-	    */
-	    if ((found_part & 1) &&
-		(!(table->file->index_flags(key,0,0) & HA_ONLY_WHOLE_INDEX) ||
-		 found_part == PREV_BITS(uint,keyinfo->key_parts)))
-	    {
-	      max_key_part=max_part_bit(found_part);
-	      /*
-		Check if quick_range could determinate how many rows we
-		will match
-	      */
-	      if (table->quick_keys.is_set(key) &&
-		  table->quick_key_parts[key] == max_key_part)
-		tmp=records= (double) table->quick_rows[key];
-	      else
-	      {
-		/* Check if we have statistic about the distribution */
-		if ((records=keyinfo->rec_per_key[max_key_part-1]))
-		  tmp=records;
-		else
-		{
-		  /*
-		    Assume that the first key part matches 1% of the file
-		    and that the whole key matches 10 (duplicates) or 1
-		    (unique) records.
-		    Assume also that more key matches proportionally more
-		    records
-		    This gives the formula:
-		    records= (x * (b-a) + a*c-b)/(c-1)
-
-		    b = records matched by whole key
-		    a = records matched by first key part (10% of all records?)
-		    c = number of key parts in key
-		    x = used key parts (1 <= x <= c)
-		  */
-		  double rec_per_key;
-                  rec_per_key= keyinfo->rec_per_key[keyinfo->key_parts-1] ?
-		    (double) keyinfo->rec_per_key[keyinfo->key_parts-1] :
-		    (double) s->records/rec+1;   
-		  if (!s->records)
-		    tmp=0;
-		  else if (rec_per_key/(double) s->records >= 0.01)
-		    tmp=rec_per_key;
-		  else
-		  {
-		    double a=s->records*0.01;
-		    tmp=(max_key_part * (rec_per_key - a) +
-			 a*keyinfo->key_parts - rec_per_key)/
-		      (keyinfo->key_parts-1);
-		    set_if_bigger(tmp,1.0);
-		  }
-		  records=(ulong) tmp;
-		}
-		/*
-		  If quick_select was used on a part of this key, we know
-		  the maximum number of rows that the key can match.
-		*/
-		if (table->quick_keys.is_set(key) &&
-		    table->quick_key_parts[key] <= max_key_part &&
-		    records > (double) table->quick_rows[key])
-		  tmp= records= (double) table->quick_rows[key];
-		else if (found_ref_or_null)
-		{
-		  /* We need to do two key searches to find key */
-		  tmp*= 2.0;
-		  records*= 2.0;
-		}
-	      }
-	      /* Limit the number of matched rows */
-	      set_if_smaller(tmp, (double) thd->variables.max_seeks_for_key);
-	      if (table->used_keys.is_set(key))
-	      {
-		/* we can use only index tree */
-		uint keys_per_block= table->file->block_size/2/
-		  (keyinfo->key_length+table->file->ref_length)+1;
-		tmp=record_count*(tmp+keys_per_block-1)/keys_per_block;
-	      }
-	      else
-		tmp=record_count*min(tmp,s->worst_seeks);
-	    }
-	    else
-	      tmp=best_time;			// Do nothing
-	  }
-          } /* not ft_key */
-	  if (tmp < best_time - records/(double) TIME_FOR_COMPARE)
-	  {
-	    best_time=tmp + records/(double) TIME_FOR_COMPARE;
-	    best=tmp;
-	    best_records=records;
-	    best_key=start_key;
-	    best_max_key_part=max_key_part;
-	  }
-	}
-	records=best_records;
-      }
-
-      /*
-	Don't test table scan if it can't be better.
-	Prefer key lookup if we would use the same key for scanning.
-
-	Don't do a table scan on InnoDB tables, if we can read the used
-	parts of the row from any of the used index.
-	This is because table scans uses index and we would not win
-	anything by using a table scan.
-        (see comment in best_access_path() for more details on the below
-         condition)
-      */
-      if ((records >= s->found_records || best > s->read_time) &&
-	  !(s->quick && best_key && s->quick->index == best_key->key
&&
-	    best_max_key_part >= s->table->quick_key_parts[best_key->key])
&&
-	  !((s->table->file->table_flags() & HA_TABLE_SCAN_ON_INDEX) &&
-	    ! s->table->used_keys.is_clear_all() && best_key) &&
-	  !(s->table->force_index && best_key && !s->quick))
-      {						// Check full join
-        ha_rows rnd_records= s->found_records;
-        /*
-          If there is a restriction on the table, assume that 25% of the
-          rows can be skipped on next part.
-          This is to force tables that this table depends on before this
-          table
-        */
-        if (found_constraint)
-          rnd_records-= rnd_records/4;
-
-        /*
-          Range optimizer never proposes a RANGE if it isn't better
-          than FULL: so if RANGE is present, it's always preferred to FULL.
-          Here we estimate its cost.
-        */
-        if (s->quick)
-        {
-          /*
-            For each record we:
-             - read record range through 'quick'
-             - skip rows which does not satisfy WHERE constraints
-           */
-          tmp= record_count *
-               (s->quick->read_time +
-               (s->found_records - rnd_records)/(double) TIME_FOR_COMPARE);
-        }
-        else
-        {
-          /* Estimate cost of reading table. */
-          tmp= s->table->file->scan_time();
-          if (s->table->map  & join->outer_join)      // Can't use join
cache
-          {
-            /*
-              For each record we have to:
-              - read the whole table record 
-              - skip rows which does not satisfy join condition
-            */
-            tmp= record_count *
-                 (tmp +     
-                 (s->records - rnd_records)/(double) TIME_FOR_COMPARE);
-          }
-          else
-          {
-            /* We read the table as many times as join buffer becomes full. */
-            tmp*= (1.0 + floor((double) cache_record_length(join,idx) *
-                               record_count /
-                               (double) thd->variables.join_buff_size));
-            /* 
-              We don't make full cartesian product between rows in the scanned
-              table and existing records because we skip all rows from the
-              scanned table, which does not satisfy join condition when 
-              we read the table (see flush_cached_records for details). Here we
-              take into account cost to read and skip these records.
-            */
-            tmp+= (s->records - rnd_records)/(double) TIME_FOR_COMPARE;
-          }
-        }
-
-        /*
-          We estimate the cost of evaluating WHERE clause for found records
-          as record_count * rnd_records / TIME_FOR_COMPARE. This cost plus
-          tmp give us total cost of using TABLE SCAN
-        */
-	if (best == DBL_MAX ||
-	    (tmp  + record_count/(double) TIME_FOR_COMPARE*rnd_records <
-	     best + record_count/(double) TIME_FOR_COMPARE*records))
-	{
-	  /*
-	    If the table has a range (s->quick is set) make_join_select()
-	    will ensure that this will be used
-	  */
-	  best=tmp;
-	  records= rows2double(rnd_records);
-	  best_key=0;
-	}
-      }
-      join->positions[idx].records_read= records;
-      join->positions[idx].key=best_key;
-      join->positions[idx].table= s;
-      if (!best_key && idx == join->const_tables &&
-	  s->table == join->sort_by_table &&
-	  join->unit->select_limit_cnt >= records)
-	join->sort_by_table= (TABLE*) 1;	// Must use temporary table
-
+      double records, best;
+      best_access_path(join, s, thd, rest_tables, idx, record_count, 
+                       read_time);
+      records= join->positions[idx].records_read;
+      best= join->positions[idx].read_time;
       /*
 	Go to the next level only if there hasn't been a better key on
 	this level! This will cut down the search for a lot simple cases!
Thread
bk commit into 5.1 tree (jani:1.2137)jani15 May