Below is the list of changes that have just been committed into a local
5.0 repository of psergey. When psergey does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html
ChangeSet
1.2093 06/04/25 23:33:31 sergefp@stripped +6 -0
BUG#15872: Don't run the range analyzer on "t1.keypart NOT IN (const1, ..., )", as that
consumes
too much memory. Instead, either create the equvalent SEL_TREE manually, or create only
two ranges that
strictly include the area to scan
(Note: just to re-iterate: increasing NOT_IN_IGNORE_THRESHOLD will make optimization run
slower for big
IN-lists, but the server will not run out of memory. O(N^2) memory use has been
eliminated)
sql/opt_range.cc
1.209 06/04/25 23:33:26 sergefp@stripped +84 -9
BUG#15872: Don't run the range analyzer on "t1.keypart NOT IN (const1, ..., )", as
that
consumes too much memory. Instead, either
A) create the equivalent SEL_TREE manually, making use of the fact that
item_not_in->array
has an ordered IN-list, or
B) create only two ranges: (-inf|NULL) < X < min_value_from_in_list,
max_value_from_in_list < X
(Choose #B if the IN-list has > 10K elements)
sql/item_cmpfunc.h
1.123 06/04/25 23:33:26 sergefp@stripped +74 -3
BUG#15872: Added in_vector::create_item(), in_vector::value_to_item() and their
implementations in concrete
classes.
sql/item.h
1.192 06/04/25 23:33:26 sergefp@stripped +1 -0
UG#15872: Added Item_decimal::set_decimal_value()
sql/item.cc
1.216 06/04/25 23:33:26 sergefp@stripped +10 -0
BUG#15872: Added Item_decimal::set_decimal_value()
mysql-test/t/func_in.test
1.19 06/04/25 23:33:26 sergefp@stripped +93 -1
Testcase for BUG#15872
mysql-test/r/func_in.result
1.24 06/04/25 23:33:26 sergefp@stripped +102 -1
Testcase for BUG#15872
# This is a BitKeeper patch. What follows are the unified diffs for the
# set of deltas contained in the patch. The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User: sergefp
# Host: newbox.mylan
# Root: /home/psergey/mysql-5.0-bug15827-r2
--- 1.215/sql/item.cc 2006-04-14 00:05:38 +04:00
+++ 1.216/sql/item.cc 2006-04-25 23:33:26 +04:00
@@ -1991,6 +1991,16 @@
}
+void Item_decimal::set_decimal_value(my_decimal *value_par)
+{
+ my_decimal2decimal(value_par, &decimal_value);
+ decimals= (uint8) decimal_value.frac;
+ unsigned_flag= !decimal_value.sign();
+ max_length= my_decimal_precision_to_length(decimal_value.intg + decimals,
+ decimals, unsigned_flag);
+}
+
+
String *Item_float::val_str(String *str)
{
// following assert is redundant, because fixed=1 assigned in constructor
--- 1.191/sql/item.h 2006-04-12 19:30:51 +04:00
+++ 1.192/sql/item.h 2006-04-25 23:33:26 +04:00
@@ -1442,6 +1442,7 @@
}
uint decimal_precision() const { return decimal_value.precision(); }
bool eq(const Item *, bool binary_cmp) const;
+ void set_decimal_value(my_decimal *value_par);
};
--- 1.122/sql/item_cmpfunc.h 2006-04-04 08:02:34 +04:00
+++ 1.123/sql/item_cmpfunc.h 2006-04-25 23:33:26 +04:00
@@ -623,15 +623,17 @@
/* Functions to handle the optimized IN */
+
+/* A vector of values of some type */
+
class in_vector :public Sql_alloc
{
- protected:
+public:
char *base;
uint size;
qsort2_cmp compare;
CHARSET_INFO *collation;
uint count;
-public:
uint used_count;
in_vector() {}
in_vector(uint elements,uint element_length,qsort2_cmp cmp_func,
@@ -647,6 +649,32 @@
qsort2(base,used_count,size,compare,collation);
}
int find(Item *item);
+
+ /*
+ Create an instance of Item_{type} (e.g. Item_decimal) constant object
+ which type allows it to hold an element of this vector without any
+ conversions.
+ The purpose of this function is to be able to get elements of this
+ vector in form of Item_xxx constants without creating Item_xxx object
+ for every array element you get (i.e. this implements "FlyWeight" pattern)
+ */
+ virtual Item* create_item() { return NULL; }
+
+ /*
+ Store the value at position #pos into provided item object
+ SYNOPSIS
+ value_to_item()
+ pos Index of value to store
+ item Constant item to store value into. The item must be of the same
+ type that create_item() returns.
+ */
+ virtual void value_to_item(uint pos, Item *item) { }
+
+ /* Compare values number pos1 and pos2 for equality */
+ bool compare_elems(uint pos1, uint pos2)
+ {
+ return test(compare(collation, base + pos1*size, base + pos2*size));
+ }
};
class in_string :public in_vector
@@ -658,6 +686,16 @@
~in_string();
void set(uint pos,Item *item);
byte *get_value(Item *item);
+ Item* create_item()
+ {
+ return new Item_string(collation);
+ }
+ void value_to_item(uint pos, Item *item)
+ {
+ String *str=((String*) base)+pos;
+ Item_string *to= (Item_string*)item;
+ to->str_value= *str;
+ }
};
class in_longlong :public in_vector
@@ -667,6 +705,19 @@
in_longlong(uint elements);
void set(uint pos,Item *item);
byte *get_value(Item *item);
+
+ Item* create_item()
+ {
+ /*
+ We're created a signed INT, this may not be correct in
+ general case (see BUG#19342).
+ */
+ return new Item_int(0);
+ }
+ void value_to_item(uint pos, Item *item)
+ {
+ ((Item_int*)item)->value= ((longlong*)base)[pos];
+ }
};
class in_double :public in_vector
@@ -676,6 +727,15 @@
in_double(uint elements);
void set(uint pos,Item *item);
byte *get_value(Item *item);
+ Item *create_item()
+ {
+ return new Item_float(0.0);
+ }
+ void value_to_item(uint pos, Item *item)
+ {
+ ((Item_float*)item)->value= ((double*) base)[pos];
+ }
+
};
class in_decimal :public in_vector
@@ -685,6 +745,16 @@
in_decimal(uint elements);
void set(uint pos, Item *item);
byte *get_value(Item *item);
+ Item *create_item()
+ {
+ return new Item_decimal(0, FALSE);
+ }
+ void value_to_item(uint pos, Item *item)
+ {
+ my_decimal *dec= ((my_decimal *)base) + pos;
+ Item_decimal *item_dec= (Item_decimal*)item;
+ item_dec->set_decimal_value(dec);
+ }
};
@@ -864,12 +934,13 @@
class Item_func_in :public Item_func_opt_neg
{
+public:
Item_result cmp_type;
in_vector *array;
cmp_item *in_item;
bool have_null;
DTCollation cmp_collation;
- public:
+
Item_func_in(List<Item> &list)
:Item_func_opt_neg(list), array(0), in_item(0), have_null(0)
{
--- 1.208/sql/opt_range.cc 2006-04-13 16:05:29 +04:00
+++ 1.209/sql/opt_range.cc 2006-04-25 23:33:26 +04:00
@@ -3501,17 +3501,92 @@
if (inv)
{
- tree= get_ne_mm_tree(param, cond_func, field,
- func->arguments()[1], func->arguments()[1],
- cmp_type);
- if (tree)
+ /*
+ We get here for conditions like "t.keypart NOT IN (....)".
+
+ If the IN-list contains only constants (and func->array is an ordered
+ array of them), we construct the appropriate SEL_ARG tree manually,
+ because constructing it using the range analyzer (as
+ AND_i( t.keypart != c_i)) will cause lots of memory to be consumed
+ (see BUG#15872).
+ */
+ if (func->array && func->cmp_type != ROW_RESULT)
{
- Item **arg, **end;
- for (arg= func->arguments()+2, end= arg+func->argument_count()-2;
- arg < end ; arg++)
+ /*
+ Create one Item_type constant object. We'll need it as
+ get_mm_parts only accepts constant values wrapped in Item_Type
+ objects.
+ We create the Item on param->mem_root which points to
+ per-statement mem_root (while thd->mem_root is currently pointing
+ to mem_root local to range optimizer).
+ */
+ MEM_ROOT *tmp_root= param->mem_root;
+ param->thd->mem_root= param->old_root;
+ Item *value_item= func->array->create_item();
+ param->thd->mem_root= tmp_root;
+
+ if (!value_item)
+ break;
+
+ /* Get a SEL_TREE for "-inf < X < c_0" interval */
+ func->array->value_to_item(0, value_item);
+ tree= get_mm_parts(param, cond_func, field, Item_func::LT_FUNC,
+ value_item, cmp_type);
+ if (!tree)
+ break;
+#define NOT_IN_IGNORE_THRESHOLD 1000
+ SEL_TREE *tree2;
+ if (func->array->count < NOT_IN_IGNORE_THRESHOLD)
{
- tree= tree_and(param, tree, get_ne_mm_tree(param, cond_func, field,
- *arg, *arg, cmp_type));
+ for (uint i=1; i < func->array->count; i++)
+ {
+ if (func->array->compare_elems(i, i-1))
+ {
+ /* Get a SEL_TREE for "-inf < X < c_i" interval */
+ func->array->value_to_item(i, value_item);
+ tree2= get_mm_parts(param, cond_func, field, Item_func::LT_FUNC,
+ value_item, cmp_type);
+
+ /* Change all intervals to be "c_{i-1} < X < c_i" */
+ for (uint idx= 0; idx < param->keys; idx++)
+ {
+ SEL_ARG *new_interval;
+ if ((new_interval= tree2->keys[idx]))
+ {
+ SEL_ARG *last_val= tree->keys[idx]->last();
+ new_interval->min_value= last_val->max_value;
+ new_interval->min_flag= NEAR_MIN;
+ }
+ }
+ tree= tree_or(param, tree, tree2);
+ }
+ }
+ }
+ else
+ func->array->value_to_item(func->array->count - 1, value_item);
+
+ /*
+ Get the SEL_TREE for the last "c_last < X < +inf" interval
+ (value_item cotains c_last already)
+ */
+ tree2= get_mm_parts(param, cond_func, field, Item_func::GT_FUNC,
+ value_item, cmp_type);
+ tree= tree_or(param, tree, tree2);
+ }
+ else
+ {
+ tree= get_ne_mm_tree(param, cond_func, field,
+ func->arguments()[1], func->arguments()[1],
+ cmp_type);
+ if (tree)
+ {
+ Item **arg, **end;
+ for (arg= func->arguments()+2, end= arg+func->argument_count()-2;
+ arg < end ; arg++)
+ {
+ tree= tree_and(param, tree, get_ne_mm_tree(param, cond_func, field,
+ *arg, *arg, cmp_type));
+ }
}
}
}
--- 1.23/mysql-test/r/func_in.result 2005-10-13 18:12:24 +04:00
+++ 1.24/mysql-test/r/func_in.result 2006-04-25 23:33:26 +04:00
@@ -1,4 +1,4 @@
-drop table if exists t1;
+drop table if exists t1, t2;
select 1 in (1,2,3);
1 in (1,2,3)
1
@@ -225,3 +225,104 @@
46
DROP VIEW v1;
DROP TABLE t1;
+create table t1 (a int);
+insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t2 (a int, filler char(200), key(a));
+insert into t2 select C.a*2, 'no' from t1 A, t1 B, t1 C;
+insert into t2 select C.a*2+1, 'yes' from t1 C;
+explain
+select * from t2 where a NOT IN (0, 2,4,6,8,10,12,14,16,18);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range a a 5 NULL 12 Using where
+select * from t2 where a NOT IN (0, 2,4,6,8,10,12,14,16,18);
+a filler
+1 yes
+3 yes
+5 yes
+7 yes
+9 yes
+11 yes
+13 yes
+15 yes
+17 yes
+19 yes
+explain select * from t2 force index(a) where a NOT IN (2,2,2,2,2,2);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range a a 5 NULL 912 Using where
+explain select * from t2 force index(a) where a <> 2;
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range a a 5 NULL 912 Using where
+drop table t2;
+create table t2 (a datetime, filler char(200), key(a));
+insert into t2 select '2006-04-25 10:00:00' + interval C.a minute,
+'no' from t1 A, t1 B, t1 C where C.a % 2 = 0;
+insert into t2 select '2006-04-25 10:00:00' + interval C.a*2+1 minute,
+'yes' from t1 C;
+explain
+select * from t2 where a NOT IN (
+'2006-04-25 10:00:00','2006-04-25 10:02:00','2006-04-25 10:04:00',
+'2006-04-25 10:06:00', '2006-04-25 10:08:00');
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range a a 9 NULL 18 Using where
+select * from t2 where a NOT IN (
+'2006-04-25 10:00:00','2006-04-25 10:02:00','2006-04-25 10:04:00',
+'2006-04-25 10:06:00', '2006-04-25 10:08:00');
+a filler
+2006-04-25 10:01:00 yes
+2006-04-25 10:03:00 yes
+2006-04-25 10:05:00 yes
+2006-04-25 10:07:00 yes
+2006-04-25 10:09:00 yes
+2006-04-25 10:11:00 yes
+2006-04-25 10:13:00 yes
+2006-04-25 10:15:00 yes
+2006-04-25 10:17:00 yes
+2006-04-25 10:19:00 yes
+drop table t2;
+create table t2 (a varchar(10), filler char(200), key(a));
+insert into t2 select 'foo', 'no' from t1 A, t1 B;
+insert into t2 select 'barbar', 'no' from t1 A, t1 B;
+insert into t2 select 'bazbazbaz', 'no' from t1 A, t1 B;
+insert into t2 values ('fon', '1'), ('fop','1'), ('barbaq','1'),
+('barbas','1'), ('bazbazbay', '1'),('zz','1');
+explain select * from t2 where a not in('foo','barbar', 'bazbazbaz');
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range a a 13 NULL 7 Using where
+drop table t2;
+create table t2 (a decimal(10,5), filler char(200), key(a));
+insert into t2 select 345.67890, 'no' from t1 A, t1 B;
+insert into t2 select 43245.34, 'no' from t1 A, t1 B;
+insert into t2 select 64224.56344, 'no' from t1 A, t1 B;
+insert into t2 values (0, '1'), (22334.123,'1'), (33333,'1'),
+(55555,'1'), (77777, '1');
+explain
+select * from t2 where a not in (345.67890, 43245.34, 64224.56344);
+id select_type table type possible_keys key key_len ref rows Extra
+1 SIMPLE t2 range a a 7 NULL 7 Using where
+select * from t2 where a not in (345.67890, 43245.34, 64224.56344);
+a filler
+0.00000 1
+22334.12300 1
+33333.00000 1
+55555.00000 1
+77777.00000 1
+drop table t2;
+create table t2 (a int, key(a), b int);
+insert into t2 values (1,1),(2,2);
+set @cnt= 1;
+set @str="update t2 set b=1 where a not in (";
+select count(*) from (
+select @str:=concat(@str, @cnt:=@cnt+1, ",")
+from t1 A, t1 B, t1 C, t1 D) Z;
+count(*)
+10000
+set @str:=concat(@str, "10000)");
+select substr(@str, 1, 50);
+substr(@str, 1, 50)
+update t2 set b=1 where a not in (2,3,4,5,6,7,8,9,
+prepare s from @str;
+execute s;
+deallocate prepare s;
+set @str=NULL;
+drop table t2;
+drop table t1;
--- 1.18/mysql-test/t/func_in.test 2005-10-13 18:11:39 +04:00
+++ 1.19/mysql-test/t/func_in.test 2006-04-25 23:33:26 +04:00
@@ -1,6 +1,6 @@
# Initialise
--disable_warnings
-drop table if exists t1;
+drop table if exists t1, t2;
--enable_warnings
#
# test of IN (NULL)
@@ -128,3 +128,95 @@
DROP VIEW v1;
DROP TABLE t1;
+
+# BUG#15872: Excessive memory consumption of range analysis of NOT IN
+create table t1 (a int);
+insert into t1 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
+create table t2 (a int, filler char(200), key(a));
+
+insert into t2 select C.a*2, 'no' from t1 A, t1 B, t1 C;
+insert into t2 select C.a*2+1, 'yes' from t1 C;
+
+explain
+select * from t2 where a NOT IN (0, 2,4,6,8,10,12,14,16,18);
+select * from t2 where a NOT IN (0, 2,4,6,8,10,12,14,16,18);
+
+explain select * from t2 force index(a) where a NOT IN (2,2,2,2,2,2);
+explain select * from t2 force index(a) where a <> 2;
+
+drop table t2;
+
+#
+# Repeat the test for DATETIME
+#
+create table t2 (a datetime, filler char(200), key(a));
+
+insert into t2 select '2006-04-25 10:00:00' + interval C.a minute,
+ 'no' from t1 A, t1 B, t1 C where C.a % 2 = 0;
+
+insert into t2 select '2006-04-25 10:00:00' + interval C.a*2+1 minute,
+ 'yes' from t1 C;
+
+explain
+select * from t2 where a NOT IN (
+ '2006-04-25 10:00:00','2006-04-25 10:02:00','2006-04-25 10:04:00',
+ '2006-04-25 10:06:00', '2006-04-25 10:08:00');
+select * from t2 where a NOT IN (
+ '2006-04-25 10:00:00','2006-04-25 10:02:00','2006-04-25 10:04:00',
+ '2006-04-25 10:06:00', '2006-04-25 10:08:00');
+drop table t2;
+
+#
+# Repeat the test for CHAR(N)
+#
+create table t2 (a varchar(10), filler char(200), key(a));
+
+insert into t2 select 'foo', 'no' from t1 A, t1 B;
+insert into t2 select 'barbar', 'no' from t1 A, t1 B;
+insert into t2 select 'bazbazbaz', 'no' from t1 A, t1 B;
+
+insert into t2 values ('fon', '1'), ('fop','1'), ('barbaq','1'),
+ ('barbas','1'), ('bazbazbay', '1'),('zz','1');
+
+explain select * from t2 where a not in('foo','barbar', 'bazbazbaz');
+
+drop table t2;
+
+#
+# Repeat for DECIMAL
+#
+create table t2 (a decimal(10,5), filler char(200), key(a));
+
+insert into t2 select 345.67890, 'no' from t1 A, t1 B;
+insert into t2 select 43245.34, 'no' from t1 A, t1 B;
+insert into t2 select 64224.56344, 'no' from t1 A, t1 B;
+
+insert into t2 values (0, '1'), (22334.123,'1'), (33333,'1'),
+ (55555,'1'), (77777, '1');
+
+explain
+select * from t2 where a not in (345.67890, 43245.34, 64224.56344);
+select * from t2 where a not in (345.67890, 43245.34, 64224.56344);
+
+drop table t2;
+
+# Try a very big IN-list
+create table t2 (a int, key(a), b int);
+insert into t2 values (1,1),(2,2);
+
+set @cnt= 1;
+set @str="update t2 set b=1 where a not in (";
+select count(*) from (
+ select @str:=concat(@str, @cnt:=@cnt+1, ",")
+ from t1 A, t1 B, t1 C, t1 D) Z;
+
+set @str:=concat(@str, "10000)");
+select substr(@str, 1, 50);
+prepare s from @str;
+execute s;
+deallocate prepare s;
+set @str=NULL;
+
+drop table t2;
+drop table t1;
+
| Thread |
|---|
| • bk commit into 5.0 tree (sergefp:1.2093) BUG#15872 | Sergey Petrunia | 25 Apr |