#At file:///export/home/jl208045/mysql/mysql-trunk-11765831/ based on revid:marc.alff@stripped
3340 Jorgen Loland 2011-04-29
BUG#11765831: 'RANGE ACCESS' MAY INCORRECTLY FILTER
AWAY QUALIFYING ROWS
Preparation patch (does not include fix for the bug):
* Extensively document key_or()
* Remove tab indentations from key_or()
* Minor code changes like using existing utility functions
in key_or()
@ sql/opt_range.cc
Extensively document key_or(), remove tab indentations from key_or(), minor code changes like using existing utility functions in key_or()
modified:
sql/opt_range.cc
=== modified file 'sql/opt_range.cc'
--- a/sql/opt_range.cc 2011-04-23 20:44:45 +0000
+++ b/sql/opt_range.cc 2011-04-29 08:02:04 +0000
@@ -6725,7 +6725,7 @@ key_or(RANGE_OPT_PARAM *param, SEL_ARG *
{
key1->free_tree();
key2->free_tree();
- return 0; // Can't optimize this
+ return 0; // Can't optimize this
}
// If one of the key is MAYBE_KEY then the found region may be bigger
@@ -6749,7 +6749,7 @@ key_or(RANGE_OPT_PARAM *param, SEL_ARG *
swap_variables(SEL_ARG *,key1,key2);
}
if (key1->use_count > 0 || !(key1=key1->clone_tree(param)))
- return 0; // OOM
+ return 0; // OOM
}
// Add tree at key2 to tree at key1
@@ -6758,237 +6758,367 @@ key_or(RANGE_OPT_PARAM *param, SEL_ARG *
for (key2=key2->first(); key2; )
{
- SEL_ARG *tmp=key1->find_range(key2); // Find key1.min <= key2.min
- int cmp;
+ /*
+ Find the latest range in key1 that starts the same place or
+ before the range in key2 starts
+ */
+ SEL_ARG *tmp=key1->find_range(key2);
+ int cmp= 0;
if (!tmp)
{
- tmp=key1->first(); // tmp.min > key2.min
+ /*
+ The range in key2 starts before the first range in key1. Use
+ the first range in key1. In this case, tmp.min > key2.min
+ */
+ tmp=key1->first();
cmp= -1;
}
else if ((cmp=tmp->cmp_max_to_min(key2)) < 0)
- { // Found tmp.max < key2.min
+ {
+ /*
+ tmp.max < key2.min, so the range in tmp is to the left of the
+ range in key2 and the ranges are not overlapping
+ */
SEL_ARG *next=tmp->next;
- /* key1 on the left of key2 non-overlapping */
if (cmp == -2 && eq_tree(tmp->next_key_part,key2->next_key_part))
{
- // Join near ranges like tmp.max < 0 and key2.min >= 0
- SEL_ARG *key2_next=key2->next;
- if (key2_shared)
- {
- if (!(key2=new SEL_ARG(*key2)))
- return 0; // out of memory
- key2->increment_use_count(key1->use_count+1);
- key2->next=key2_next; // New copy of key2
- }
- key2->copy_min(tmp);
- if (!(key1=key1->tree_delete(tmp)))
- { // Only one key in tree
- key1=key2;
- key1->make_root();
- key2=key2_next;
- break;
- }
+ /*
+ cmp == -2 means that these ranges are adjacent. Since the
+ range in key1 and key2 have equal next_key_part they are
+ merged. E.g:
+
+ tmp: a < 3 tmp->next_key_part: b = 1
+ key2: a = 3 key2->next_key_part: b = 1
+ ^ ^
+ adjacent equal next_key_part
+ */
+ SEL_ARG *key2_next=key2->next;
+ if (key2_shared)
+ {
+ if (!(key2=new SEL_ARG(*key2)))
+ return 0; // out of memory
+ key2->increment_use_count(key1->use_count+1);
+ key2->next=key2_next; // New copy of key2
+ }
+ /*
+ Merge by making the range of key2 [tmp.min, key2.max]
+ Delete the tmp range from key1; the merged range now in key2
+ will be inserted into key1 below.
+ */
+ key2->copy_min(tmp);
+ if (!(key1=key1->tree_delete(tmp)))
+ { // Only one key in tree
+ key1=key2;
+ key1->make_root();
+ key2=key2_next;
+ break;
+ }
}
- if (!(tmp=next)) // tmp.min > key2.min
- break; // Copy rest of key2
+ if (!(tmp=next)) // Move to next range in key1. Now tmp.min > key2.min
+ break; // No more ranges in key1. Copy rest of key2
}
+
if (cmp < 0)
- { // tmp.min > key2.min
+ { // tmp.min > key2.min
int tmp_cmp;
- if ((tmp_cmp=tmp->cmp_min_to_max(key2)) > 0) // if tmp.min > key2.max
+ if ((tmp_cmp=tmp->cmp_min_to_max(key2)) > 0)
{
- /* tmp is on the right of key2 non-overlapping */
- if (tmp_cmp == 2 && eq_tree(tmp->next_key_part,key2->next_key_part))
- { // ranges are connected
- tmp->copy_min_to_min(key2);
- key1->merge_flags(key2);
- if (tmp->min_flag & NO_MIN_RANGE &&
- tmp->max_flag & NO_MAX_RANGE)
- {
- if (key1->maybe_flag)
- return new SEL_ARG(SEL_ARG::MAYBE_KEY);
- return 0;
- }
- key2->increment_use_count(-1); // Free not used tree
- key2=key2->next;
- continue;
- }
- else
- {
- SEL_ARG *next=key2->next; // Keys are not overlapping
- if (key2_shared)
- {
- SEL_ARG *cpy= new SEL_ARG(*key2); // Must make copy
- if (!cpy)
- return 0; // OOM
- key1=key1->insert(cpy);
- key2->increment_use_count(key1->use_count+1);
- }
- else
- key1=key1->insert(key2); // Will destroy key2_root
- key2=next;
- continue;
- }
+ /*
+ tmp.min > key2.max, so the range in tmp is to the right of the
+ range in key2 and the ranges are not overlapping.
+ */
+ if (tmp_cmp == 2 && eq_tree(tmp->next_key_part,key2->next_key_part))
+ {
+ /*
+ Adjacent ranges with equal next_key_part (see comment
+ above). Make range of tmp [key2.min, tmp.max] and move on
+ to next key2 range.
+ */
+ tmp->copy_min_to_min(key2);
+ key1->merge_flags(key2);
+ if (tmp->min_flag & NO_MIN_RANGE &&
+ tmp->max_flag & NO_MAX_RANGE)
+ {
+ if (key1->maybe_flag)
+ return new SEL_ARG(SEL_ARG::MAYBE_KEY);
+ return 0;
+ }
+ key2->increment_use_count(-1); // Free not used tree
+ key2=key2->next;
+ continue;
+ }
+ else
+ {
+ /*
+ The range in key2 does not overlap with any range in key1,
+ so insert it into key1 and move on to the next key2 range
+ */
+ SEL_ARG *next=key2->next;
+ if (key2_shared)
+ {
+ SEL_ARG *cpy= new SEL_ARG(*key2); // Must make copy
+ if (!cpy)
+ return 0; // OOM
+ key1=key1->insert(cpy);
+ key2->increment_use_count(key1->use_count+1);
+ }
+ else
+ key1=key1->insert(key2); // Will destroy key2_root
+ key2=next;
+ continue;
+ }
}
}
- /*
- tmp.min >= key2.min && tmp.min <= key.max (overlapping ranges)
- key2.min <= tmp.min <= key2.max
- */
+ /*
+ The ranges in tmp and key2 are overlapping, i.e.
+ key2.min <= tmp.min <= key2.max (in which case cmp < 0) or
+ tmp.min <= key2.min <= tmp.max (in which case cmp >= 0)
+
+ Corollary: tmp.min <= key2.max
+ */
if (eq_tree(tmp->next_key_part,key2->next_key_part))
{
+ // Merge overlapping ranges with equal next_key_part
if (tmp->is_same(key2))
{
- /*
- Found exact match of key2 inside key1.
+ /*
+ Found exact match of key2 inside key1.
Use the relevant range in key1.
*/
- tmp->merge_flags(key2); // Copy maybe flags
- key2->increment_use_count(-1); // Free not used tree
+ tmp->merge_flags(key2); // Copy maybe flags
+ key2->increment_use_count(-1); // Free not used tree
}
else
{
- SEL_ARG *last=tmp;
+ SEL_ARG *last=tmp;
SEL_ARG *first=tmp;
- /*
- Find the last range in tmp that overlaps key2 and has the same
- condition on the rest of the keyparts.
- */
- while (last->next && last->next->cmp_min_to_max(key2) <= 0 &&
- eq_tree(last->next->next_key_part,key2->next_key_part))
- {
- /*
- We've found the last overlapping key1 range in last.
- This means that the ranges between (and including) the
- first overlapping range (tmp) and the last overlapping range
- (last) are fully nested into the current range of key2
- and can safely be discarded. We just need the minimum endpoint
- of the first overlapping range (tmp) so we can compare it with
- the minimum endpoint of the enclosing key2 range.
- */
- SEL_ARG *save=last;
- last=last->next;
- key1=key1->tree_delete(save);
- }
/*
- The tmp range (the first overlapping range) could have been discarded
- by the previous loop. We should re-direct tmp to the new united range
- that's taking its place.
+ Find the last range in tmp that overlaps key2 and has the
+ same next_key_part, i.e, the rightmost range in tmp such
+ that tmp.min <= key2.max.
+
+ key2: [****------------------------------*******]
+ tmp: [--] [---------] [------] [-----] [++++]
+ ^ ^ ^
+ first last different next_key_part
+
+ (key2.min and max is somewhere in the "***" range)
+
+ Since key2 covers them, the ranges between first and last
+ are merged into one range by deleting first...last-1 from
+ the key1 tree. In the figure, this applies to first and the
+ two consecutive ranges. The range of last is then extended:
+ * last.min: Set to min(key2.min, first.min)
+ * last.max: If key2 overlaps last->next (it has different
+ next_key_part): Set to last->next.min
+ Otherwise: Set to max(key2.max, last.max)
*/
+ while (last->next && last->next->cmp_min_to_max(key2) <= 0 &&
+ eq_tree(last->next->next_key_part,key2->next_key_part))
+ {
+ // last->next is covered by key2 and can be deleted
+ SEL_ARG *save=last;
+ last=last->next;
+ key1=key1->tree_delete(save);
+ }
+ // Redirect tmp to last which will cover the entire range
tmp= last;
+ /*
+ We need the minimum endpoint of first so we can compare it
+ with the minimum endpoint of the enclosing key2 range.
+ */
last->copy_min(first);
+ // Set last.min= min(last.min, key2.min)
bool full_range= last->copy_min(key2);
if (!full_range)
{
if (last->next && key2->cmp_max_to_min(last->next) >= 0)
{
- last->max_value= last->next->min_value;
- if (last->next->min_flag & NEAR_MIN)
- last->max_flag&= ~NEAR_MAX;
- else
- last->max_flag|= NEAR_MAX;
+ /*
+ last->next.min < key2.max. This means that next_key_part
+ of last->next and key2 are not equal. Extend the range
+ of last up to where last->next starts.
+ */
+ last->copy_min_to_max(last->next);
}
- else
+ else // Set last.max= max(last.max, key2.max)
full_range= last->copy_max(key2);
}
- if (full_range)
- { // Full range
- key1->free_tree();
- for (; key2 ; key2=key2->next)
- key2->increment_use_count(-1); // Free not used tree
- if (key1->maybe_flag)
- return new SEL_ARG(SEL_ARG::MAYBE_KEY);
- return 0;
- }
+ if (full_range)
+ { // Full range
+ key1->free_tree();
+ for (; key2 ; key2=key2->next)
+ key2->increment_use_count(-1); // Free not used tree
+ if (key1->maybe_flag)
+ return new SEL_ARG(SEL_ARG::MAYBE_KEY);
+ return 0;
+ }
}
}
if (cmp >= 0 && tmp->cmp_min_to_min(key2) < 0)
- { // tmp.min <= x < key2.min
+ {
+ /*
+ tmp.min < key2.min and "cmp >= 0" means that tmp.max >= key2.min.
+ The ranges are overlapping but have not been merged because
+ next_key_part of tmp and key2 are different
+
+ Current situation:
+ key2: [---------]
+ tmp: [------**********]
+ ^ ^
+ tmp.max is somewhere between these points here
+ */
SEL_ARG *new_arg=tmp->clone_first(key2);
if (!new_arg)
- return 0; // OOM
+ return 0; // OOM
if ((new_arg->next_key_part= key1->next_key_part))
- new_arg->increment_use_count(key1->use_count+1);
+ new_arg->increment_use_count(key1->use_count+1);
tmp->copy_min_to_min(key2);
key1=key1->insert(new_arg);
- }
+ } // tmp.min >= key2.min due to this if()
- // tmp.min >= key2.min && tmp.min <= key2.max
- SEL_ARG key(*key2); // Get copy we can modify
+ // Now key2.min <= tmp.min <= key2.max
+ SEL_ARG key2_cpy(*key2); // Get copy we can modify
for (;;)
{
- if (tmp->cmp_min_to_min(&key) > 0)
- { // key.min <= x < tmp.min
- SEL_ARG *new_arg=key.clone_first(tmp);
- if (!new_arg)
- return 0; // OOM
- if ((new_arg->next_key_part=key.next_key_part))
- new_arg->increment_use_count(key1->use_count+1);
- key1=key1->insert(new_arg);
- }
- if ((cmp=tmp->cmp_max_to_max(&key)) <= 0)
- { // tmp.min. <= x <= tmp.max
- tmp->maybe_flag|= key.maybe_flag;
- key.increment_use_count(key1->use_count+1);
- tmp->next_key_part= key_or(param, tmp->next_key_part, key.next_key_part);
- if (!cmp) // Key2 is ready
- break;
- key.copy_max_to_min(tmp);
- if (!(tmp=tmp->next))
- {
- SEL_ARG *tmp2= new SEL_ARG(key);
- if (!tmp2)
- return 0; // OOM
- key1=key1->insert(tmp2);
- key2=key2->next;
- goto end;
- }
- if (tmp->cmp_min_to_max(&key) > 0)
- {
- SEL_ARG *tmp2= new SEL_ARG(key);
- if (!tmp2)
- return 0; // OOM
- key1=key1->insert(tmp2);
- break;
- }
+ if (tmp->cmp_min_to_min(&key2_cpy) > 0)
+ {
+ /*
+ tmp.min > key2_cpy.min:
+
+ Current situation:
+ key2_cpy: [-----------]
+ tmp: [-******]
+
+ Steps:
+ 1) Make new_arg with range [key2_cpy.min, tmp.min] and
+ insert it into key1
+ 2) Make key2 the range [tmp.min, key2_cpy.max]
+ */
+ SEL_ARG *new_arg=key2_cpy.clone_first(tmp);
+ if (!new_arg)
+ return 0; // OOM
+ if ((new_arg->next_key_part=key2_cpy.next_key_part))
+ new_arg->increment_use_count(key1->use_count+1);
+ key1=key1->insert(new_arg);
+ key2_cpy.copy_min_to_min(tmp);
+ }
+ if ((cmp=tmp->cmp_max_to_max(&key2_cpy)) <= 0)
+ {
+ /*
+ tmp.max <= key2_cpy.max
+
+ Current situation:
+ key2_cpy: [-----]
+ tmp: [---**]
+
+ Steps:
+ 1) Update next_key_part of tmp: OR it with key2_cpy->next_key_part.
+ 2) If tmp.max < key2_cpy.max: make key2_cpy the range
+ [tmp.max, key2_cpy.max] and insert into key1.
+ */
+ tmp->maybe_flag|= key2_cpy.maybe_flag;
+ key2_cpy.increment_use_count(key1->use_count+1);
+ tmp->next_key_part= key_or(param, tmp->next_key_part,
+ key2_cpy.next_key_part);
+
+ if (!cmp)
+ break; // tmp.max == key2_cpy.max: done with this key2 range
+
+ // Make key2_cpy the range [tmp.max, key2_cpy.max]
+ key2_cpy.copy_max_to_min(tmp);
+ if (!(tmp=tmp->next))
+ {
+ /*
+ No more ranges in key1. Insert key2_cpy and go to "end"
+ label to insert remaining ranges in key2 if any
+ */
+ SEL_ARG *tmp2= new SEL_ARG(key2_cpy);
+ if (!tmp2)
+ return 0; // OOM
+ key1=key1->insert(tmp2);
+ key2=key2->next;
+ goto end;
+ }
+ if (tmp->cmp_min_to_max(&key2_cpy) > 0)
+ {
+ /*
+ The next range in key1 does not overlap with key2_cpy.
+ Insert this range into key1 and move on to the next range
+ in key2
+ */
+ SEL_ARG *tmp2= new SEL_ARG(key2_cpy);
+ if (!tmp2)
+ return 0; // OOM
+ key1=key1->insert(tmp2);
+ break;
+ }
+ /*
+ key2_cpy overlaps with the next range in key1 and the case
+ is now "key2.min <= tmp.min <= key2.max". Go back to for(;;)
+ to handle this situation
+ */
+ continue;
}
else
{
- SEL_ARG *new_arg=tmp->clone_last(&key); // tmp.min <= x <= key.max
- if (!new_arg)
- return 0; // OOM
- tmp->copy_max_to_min(&key);
- tmp->increment_use_count(key1->use_count+1);
- /* Increment key count as it may be used for next loop */
- key.increment_use_count(1);
- new_arg->next_key_part= key_or(param, tmp->next_key_part, key.next_key_part);
- key1=key1->insert(new_arg);
- break;
+ /*
+ tmp.max > key2_cpy.max:
+
+ Current situation:
+ key2_cpy: [-----]
+ tmp: [--------]
+
+ Steps:
+
+ 1) Make new_arg with range [tmp.min, key2_cpy.max].
+ new_arg->next_key_part is OR between next_key_part
+ of tmp and key2_cpy
+ 2) Make tmp the range [key2.max, tmp.max]
+ 3) Insert new_arg into key1 (after changing tmp range to
+ insert into correct location in key1 tree)
+ */
+ SEL_ARG *new_arg=tmp->clone_last(&key2_cpy);
+ if (!new_arg)
+ return 0; // OOM
+ tmp->copy_max_to_min(&key2_cpy);
+ tmp->increment_use_count(key1->use_count+1);
+ /* Increment key count as it may be used for next loop */
+ key2_cpy.increment_use_count(1);
+ new_arg->next_key_part= key_or(param, tmp->next_key_part,
+ key2_cpy.next_key_part);
+ key1=key1->insert(new_arg);
+ break;
}
}
- key2=key2->next;
+ key2=key2->next; //Move on to next range in key2
}
end:
+ /*
+ Add key2 ranges that are non-overlapping with and higher than the
+ highest range in key1.
+ */
while (key2)
{
SEL_ARG *next=key2->next;
if (key2_shared)
{
- SEL_ARG *tmp=new SEL_ARG(*key2); // Must make copy
+ SEL_ARG *tmp=new SEL_ARG(*key2); // Must make copy
if (!tmp)
- return 0;
+ return 0;
key2->increment_use_count(key1->use_count+1);
key1=key1->insert(tmp);
}
else
- key1=key1->insert(key2); // Will destroy key2_root
+ key1=key1->insert(key2); // Will destroy key2_root
key2=next;
}
key1->use_count++;
+
return key1;
}
Attachment: [text/bzr-bundle] bzr/jorgen.loland@oracle.com-20110429080204-ql1kyfu0nt9ygdp4.bundle