3874 Roy Lyseng 2010-04-16
Bug#51110: Wrong result with COUNT(*) and semijoin
The duplicate weedout semijoin strategy is shipping certain column
combinations to a temporary table, in order to filter out any duplicates
caused by reading duplicate rows from semijoined tables.
The temporary table starts out as a memory table, but if the size reaches
a maximum value, it is converted to a disk-based table.
When such conversion occurs, the number of rows output from the query is
reported as one less than it should be.
The reason for this is that when do_sj_dups_weedout() creates the
disk-based table, the return value is erroneously reported as 1
instead of 0, meaning that the row in question is always registered
as a duplicate. The problem is fixed by setting proper return values.
The function create_internal_tmp_table_from_heap(), which converts the
memory table to a disk-based table, also copies the offending row
(the "last row") into the new table. There is an argument ignore_last_dup
that manages whether duplicate insertions of this row was allowed, but
duplicate weedout also needed to know whether a duplicate was reported
or not. This problem was fixed by adding an optional output argument
is_duplicate to the function.
mysql-test/r/subselect3.result
Fixed result for bug#51110.
mysql-test/r/subselect3_jcl6.result
Fixed result for bug#51110.
mysql-test/t/subselect3.test
Extended previously failing test to be performed with and without
temporary table overflow. Also added test so that overflow happens
with and without duplicate row.
sql/sql_select.cc
do_sj_dups_weedout() returns 1 if create_internal_tmp_table_from_heap()
is called successfully.
Added output argument "is_duplicate" to
create_internal_tmp_table_from_heap, added Doxygen header.
sql/sql_select.h
Updated prototype of create_internal_tmp_table_from_heap()
sql/sql_show.cc
Added NULL argument to create_internal_tmp_table_from_heap()
sql/sql_union.cc
Added NULL argument to create_internal_tmp_table_from_heap()
sql/sql_update.cc
Added NULL argument to create_internal_tmp_table_from_heap()
modified:
mysql-test/r/subselect3.result
mysql-test/r/subselect3_jcl6.result
mysql-test/t/subselect3.test
sql/sql_select.cc
sql/sql_select.h
sql/sql_show.cc
sql/sql_union.cc
sql/sql_update.cc
3873 Vladislav Vaintroub 2010-04-16 [merge]
merge
modified:
cmake/os/WindowsCache.cmake
sql/sys_vars.cc
=== modified file 'mysql-test/r/subselect3.result'
--- a/mysql-test/r/subselect3.result 2010-03-25 09:02:07 +0000
+++ b/mysql-test/r/subselect3.result 2010-04-16 14:20:09 +0000
@@ -1171,26 +1171,54 @@ id select_type table type possible_keys
1 PRIMARY t4 eq_ref PRIMARY PRIMARY 4 test.t1.c 1 Using index; FirstMatch(t1)
1 PRIMARY t3 ALL NULL NULL NULL NULL 100 Using where; Using join buffer
drop table t1, t3, t4;
-create table t1 (a int) as select * from t0 where a < 5;
+create table t1 (a int);
+insert into t1 values (0),(0),(0),(1),(1),(1),(2),(2),(2),(3),(3),(3);
set @save_max_heap_table_size=@@max_heap_table_size;
+set @save_optimizer_join_cache_level=@@optimizer_join_cache_level;
set @@optimizer_switch='firstmatch=off,materialization=off';
set @@max_heap_table_size= 16384;
-explain select count(*) from t0 A, t0 B, t0 C, t0 D where D.a in (select a from t1 E);
+# Attempt to make one test that overflows the heap table when a
+# non-duplicate row is inserted and one test that overflows the
+# heap table when a duplicate record is inserted. Debugging showed
+# that these situations occurred with max_heap_table_size=16384
+# and optimizer_join_cache_level equals 1 and 0, respectively.
+# Finally execute a test that does not overflow the heap table.
+explain
+select count(*) from t0 A, t0 B, t0 C
+where C.a in (select a from t1 D);
id select_type table type possible_keys key key_len ref rows Extra
-1 PRIMARY E ALL NULL NULL NULL NULL 5 Start temporary
-1 PRIMARY A ALL NULL NULL NULL NULL 10 Using join buffer
+1 PRIMARY A ALL NULL NULL NULL NULL 10 Start temporary
1 PRIMARY B ALL NULL NULL NULL NULL 10 Using join buffer
1 PRIMARY C ALL NULL NULL NULL NULL 10 Using join buffer
-1 PRIMARY D ALL NULL NULL NULL NULL 10 Using where; End temporary; Using join buffer
+1 PRIMARY D ALL NULL NULL NULL NULL 12 Using where; End temporary; Using join buffer
flush status;
-select count(*) from t0 A, t0 B, t0 C, t0 D where D.a in (select a from t1 E);
+select count(*) from t0 A, t0 B, t0 C
+where C.a in (select a from t1 D);
count(*)
-4999
+400
show status like 'Created_tmp_disk_tables';
Variable_name Value
Created_tmp_disk_tables 1
-set @save_max_heap_table_size=@@max_heap_table_size;
+set @@optimizer_join_cache_level= 0;
+flush status;
+select count(*) from t0 A, t0 B, t0 C
+where C.a in (select a from t1 D);
+count(*)
+400
+show status like 'Created_tmp_disk_tables';
+Variable_name Value
+Created_tmp_disk_tables 1
+set @@max_heap_table_size= @save_max_heap_table_size;
+flush status;
+select count(*) from t0 A, t0 B, t0 C
+where C.a in (select a from t1 D);
+count(*)
+400
+show status like 'Created_tmp_disk_tables';
+Variable_name Value
+Created_tmp_disk_tables 0
set @@optimizer_switch=default;
+set @@optimizer_join_cache_level=@save_optimizer_join_cache_level;
drop table t0, t1;
create table t0 (a int);
insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
=== modified file 'mysql-test/r/subselect3_jcl6.result'
--- a/mysql-test/r/subselect3_jcl6.result 2010-03-09 23:03:02 +0000
+++ b/mysql-test/r/subselect3_jcl6.result 2010-04-16 14:20:09 +0000
@@ -1176,26 +1176,54 @@ id select_type table type possible_keys
1 PRIMARY t4 eq_ref PRIMARY PRIMARY 4 test.t1.c 1 Using index; FirstMatch(t1)
1 PRIMARY t3 ALL NULL NULL NULL NULL 100 Using where; Using join buffer
drop table t1, t3, t4;
-create table t1 (a int) as select * from t0 where a < 5;
+create table t1 (a int);
+insert into t1 values (0),(0),(0),(1),(1),(1),(2),(2),(2),(3),(3),(3);
set @save_max_heap_table_size=@@max_heap_table_size;
+set @save_optimizer_join_cache_level=@@optimizer_join_cache_level;
set @@optimizer_switch='firstmatch=off,materialization=off';
set @@max_heap_table_size= 16384;
-explain select count(*) from t0 A, t0 B, t0 C, t0 D where D.a in (select a from t1 E);
+# Attempt to make one test that overflows the heap table when a
+# non-duplicate row is inserted and one test that overflows the
+# heap table when a duplicate record is inserted. Debugging showed
+# that these situations occurred with max_heap_table_size=16384
+# and optimizer_join_cache_level equals 1 and 0, respectively.
+# Finally execute a test that does not overflow the heap table.
+explain
+select count(*) from t0 A, t0 B, t0 C
+where C.a in (select a from t1 D);
id select_type table type possible_keys key key_len ref rows Extra
-1 PRIMARY E ALL NULL NULL NULL NULL 5 Start temporary
-1 PRIMARY A ALL NULL NULL NULL NULL 10 Using join buffer
+1 PRIMARY A ALL NULL NULL NULL NULL 10 Start temporary
1 PRIMARY B ALL NULL NULL NULL NULL 10 Using join buffer
1 PRIMARY C ALL NULL NULL NULL NULL 10 Using join buffer
-1 PRIMARY D ALL NULL NULL NULL NULL 10 Using where; End temporary; Using join buffer
+1 PRIMARY D ALL NULL NULL NULL NULL 12 Using where; End temporary; Using join buffer
flush status;
-select count(*) from t0 A, t0 B, t0 C, t0 D where D.a in (select a from t1 E);
+select count(*) from t0 A, t0 B, t0 C
+where C.a in (select a from t1 D);
count(*)
-4999
+400
show status like 'Created_tmp_disk_tables';
Variable_name Value
Created_tmp_disk_tables 1
-set @save_max_heap_table_size=@@max_heap_table_size;
+set @@optimizer_join_cache_level= 0;
+flush status;
+select count(*) from t0 A, t0 B, t0 C
+where C.a in (select a from t1 D);
+count(*)
+400
+show status like 'Created_tmp_disk_tables';
+Variable_name Value
+Created_tmp_disk_tables 1
+set @@max_heap_table_size= @save_max_heap_table_size;
+flush status;
+select count(*) from t0 A, t0 B, t0 C
+where C.a in (select a from t1 D);
+count(*)
+400
+show status like 'Created_tmp_disk_tables';
+Variable_name Value
+Created_tmp_disk_tables 0
set @@optimizer_switch=default;
+set @@optimizer_join_cache_level=@save_optimizer_join_cache_level;
drop table t0, t1;
create table t0 (a int);
insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);
=== modified file 'mysql-test/t/subselect3.test'
--- a/mysql-test/t/subselect3.test 2010-01-20 10:11:29 +0000
+++ b/mysql-test/t/subselect3.test 2010-04-16 14:20:09 +0000
@@ -973,18 +973,42 @@ drop table t1, t3, t4;
#
# Test if we handle duplicate elimination temptable overflowing to disk
#
-create table t1 (a int) as select * from t0 where a < 5;
+create table t1 (a int);
+insert into t1 values (0),(0),(0),(1),(1),(1),(2),(2),(2),(3),(3),(3);
set @save_max_heap_table_size=@@max_heap_table_size;
+set @save_optimizer_join_cache_level=@@optimizer_join_cache_level;
set @@optimizer_switch='firstmatch=off,materialization=off';
set @@max_heap_table_size= 16384;
-explain select count(*) from t0 A, t0 B, t0 C, t0 D where D.a in (select a from t1 E);
+--echo # Attempt to make one test that overflows the heap table when a
+--echo # non-duplicate row is inserted and one test that overflows the
+--echo # heap table when a duplicate record is inserted. Debugging showed
+--echo # that these situations occurred with max_heap_table_size=16384
+--echo # and optimizer_join_cache_level equals 1 and 0, respectively.
+--echo # Finally execute a test that does not overflow the heap table.
+explain
+select count(*) from t0 A, t0 B, t0 C
+where C.a in (select a from t1 D);
flush status;
-select count(*) from t0 A, t0 B, t0 C, t0 D where D.a in (select a from t1 E);
+select count(*) from t0 A, t0 B, t0 C
+where C.a in (select a from t1 D);
show status like 'Created_tmp_disk_tables';
-set @save_max_heap_table_size=@@max_heap_table_size;
+
+set @@optimizer_join_cache_level= 0;
+flush status;
+select count(*) from t0 A, t0 B, t0 C
+where C.a in (select a from t1 D);
+show status like 'Created_tmp_disk_tables';
+
+set @@max_heap_table_size= @save_max_heap_table_size;
+flush status;
+select count(*) from t0 A, t0 B, t0 C
+where C.a in (select a from t1 D);
+show status like 'Created_tmp_disk_tables';
+
set @@optimizer_switch=default;
+set @@optimizer_join_cache_level=@save_optimizer_join_cache_level;
drop table t0, t1;
#
=== modified file 'sql/sql_select.cc'
--- a/sql/sql_select.cc 2010-04-13 13:02:27 +0000
+++ b/sql/sql_select.cc 2010-04-16 14:20:09 +0000
@@ -160,7 +160,8 @@ create_internal_tmp_table_from_heap2(THD
ENGINE_COLUMNDEF *start_recinfo,
ENGINE_COLUMNDEF **recinfo,
int error,
- bool ignore_last_dupp_key_error,
+ bool ignore_last_dup,
+ bool *is_duplicate,
handlerton *hton,
const char *proc_info);
static int do_select(JOIN *join,List<Item> *fields,TABLE *tmp_table,
@@ -9875,9 +9876,10 @@ end_sj_materialize(JOIN *join, JOIN_TAB
{
/* create_myisam_from_heap will generate error if needed */
if (table->file->is_fatal_error(error, HA_CHECK_DUP) &&
- create_internal_tmp_table_from_heap(thd, table,
- sjm->sjm_table_param.start_recinfo,
- &sjm->sjm_table_param.recinfo, error, 1))
+ create_internal_tmp_table_from_heap(
+ thd, table,
+ sjm->sjm_table_param.start_recinfo,
+ &sjm->sjm_table_param.recinfo, error, TRUE, NULL))
DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */
}
}
@@ -15869,26 +15871,57 @@ static bool create_internal_tmp_table(TA
/**
- If a HEAP table gets full, create a MyISAM table and copy all rows
+ If a MEMORY table gets full, create a disk-based table and copy all rows
to this.
+
+ @param thd THD reference
+ @param table Table reference
+ @param start_recinfo Engine's column descriptions
+ @param recinfo[in,out] End of engine's column descriptions
+ @param error Reason why inserting into MEMORY table failed.
+ @param ignore_last_dup If true, ignore duplicate key error for last
+ inserted key (see detailed description below).
+ @param is_duplicate[out] if non-NULL and ignore_last_dup is TRUE,
+ return TRUE if last key was a duplicate,
+ and FALSE otherwise.
+
+ @detail
+ Function can be called with any error code, but only HA_ERR_RECORD_FILE_FULL
+ will be handled, all other errors cause a fatal error to be thrown.
+ The function creates a disk-based temporary table, copies all records
+ from the MEMORY table into this new table, deletes the old table and
+ switches to use the new table within the table handle.
+ The function uses table->record[1] as a temporary buffer while copying.
+
+ The function assumes that table->record[0] contains the row that caused
+ the error when inserting into the MEMORY table (the "last row").
+ After all existing rows have been copied to the new table, the last row
+ is attempted to be inserted as well. If ignore_last_dup is true,
+ this row can be a duplicate of an existing row without throwing an error.
+ If is_duplicate is non-NULL, an indication of whether the last row was
+ a duplicate is returned.
*/
bool create_internal_tmp_table_from_heap(THD *thd, TABLE *table,
ENGINE_COLUMNDEF *start_recinfo,
ENGINE_COLUMNDEF **recinfo,
- int error, bool ignore_last_dupp_key_error)
+ int error, bool ignore_last_dup,
+ bool *is_duplicate)
{
return create_internal_tmp_table_from_heap2(thd, table,
start_recinfo, recinfo, error,
- ignore_last_dupp_key_error,
+ ignore_last_dup,
+ is_duplicate,
myisam_hton,
"converting HEAP to MyISAM");
}
-/*
- If a HEAP table gets full, create a internal table in MyISAM or Maria
- and copy all rows to this
+/**
+ If a MEMORY table gets full, create a disk-based table
+ and copy all rows to this.
+
+ @details See documentation of create_internal_tmp_table_from_heap()
*/
@@ -15897,7 +15930,8 @@ create_internal_tmp_table_from_heap2(THD
ENGINE_COLUMNDEF *start_recinfo,
ENGINE_COLUMNDEF **recinfo,
int error,
- bool ignore_last_dupp_key_error,
+ bool ignore_last_dup,
+ bool *is_duplicate,
handlerton *hton,
const char *proc_info)
{
@@ -15978,8 +16012,15 @@ create_internal_tmp_table_from_heap2(THD
if ((write_err=new_table.file->ha_write_row(table->record[0])))
{
if (new_table.file->is_fatal_error(write_err, HA_CHECK_DUP) ||
- !ignore_last_dupp_key_error)
+ !ignore_last_dup)
goto err;
+ if (is_duplicate)
+ *is_duplicate= TRUE;
+ }
+ else
+ {
+ if (is_duplicate)
+ *is_duplicate= FALSE;
}
/* remove heap table and change to use myisam table */
@@ -16794,13 +16835,19 @@ int do_sj_dups_weedout(THD *thd, SJ_TMP_
error= sjtbl->tmp_table->file->ha_write_row(sjtbl->tmp_table->record[0]);
if (error)
{
- /* create_internal_tmp_table_from_heap will generate error if needed */
- if (sjtbl->tmp_table->file->is_fatal_error(error, HA_CHECK_DUP) &&
- create_internal_tmp_table_from_heap(thd, sjtbl->tmp_table,
+ /* If this is a duplicate error, return immediately */
+ if (!sjtbl->tmp_table->file->is_fatal_error(error, HA_CHECK_DUP))
+ DBUG_RETURN(1);
+ /*
+ Other error than duplicate error: Attempt to create a temporary table.
+ */
+ bool is_duplicate;
+ if (create_internal_tmp_table_from_heap(thd, sjtbl->tmp_table,
sjtbl->start_recinfo,
- &sjtbl->recinfo, error, 1))
+ &sjtbl->recinfo, error, TRUE,
+ &is_duplicate))
DBUG_RETURN(-1);
- DBUG_RETURN(1);
+ DBUG_RETURN(is_duplicate ? 1 : 0);
}
DBUG_RETURN(0);
}
@@ -17934,10 +17981,11 @@ end_write(JOIN *join, JOIN_TAB *join_tab
{
if (!table->file->is_fatal_error(error, HA_CHECK_DUP))
goto end;
- if (create_internal_tmp_table_from_heap(join->thd, table,
- join->tmp_table_param.start_recinfo,
- &join->tmp_table_param.recinfo,
- error, 1))
+ if (create_internal_tmp_table_from_heap(
+ join->thd, table,
+ join->tmp_table_param.start_recinfo,
+ &join->tmp_table_param.recinfo,
+ error, TRUE, NULL))
DBUG_RETURN(NESTED_LOOP_ERROR); // Not a table_is_full error
table->s->uniques=0; // To ensure rows are the same
}
@@ -18023,7 +18071,7 @@ end_update(JOIN *join, JOIN_TAB *join_ta
if (create_internal_tmp_table_from_heap(join->thd, table,
join->tmp_table_param.start_recinfo,
&join->tmp_table_param.recinfo,
- error, 0))
+ error, FALSE, NULL))
DBUG_RETURN(NESTED_LOOP_ERROR); // Not a table_is_full error
/* Change method to update rows */
table->file->ha_index_init(0, 0);
@@ -18118,10 +18166,11 @@ end_write_group(JOIN *join, JOIN_TAB *jo
{
int error= table->file->ha_write_row(table->record[0]);
if (error &&
- create_internal_tmp_table_from_heap(join->thd, table,
- join->tmp_table_param.start_recinfo,
- &join->tmp_table_param.recinfo,
- error, 0))
+ create_internal_tmp_table_from_heap(
+ join->thd, table,
+ join->tmp_table_param.start_recinfo,
+ &join->tmp_table_param.recinfo,
+ error, FALSE, NULL))
DBUG_RETURN(NESTED_LOOP_ERROR);
}
if (join->rollup.state != ROLLUP::STATE_NONE)
@@ -21831,7 +21880,7 @@ int JOIN::rollup_write_data(uint idx, TA
if (create_internal_tmp_table_from_heap(thd, table_arg,
tmp_table_param.start_recinfo,
&tmp_table_param.recinfo,
- write_error, 0))
+ write_error, FALSE, NULL))
return 1;
}
}
=== modified file 'sql/sql_select.h'
--- a/sql/sql_select.h 2010-04-08 10:50:40 +0000
+++ b/sql/sql_select.h 2010-04-16 14:20:09 +0000
@@ -1843,7 +1843,8 @@ void copy_funcs(Item **func_ptr);
bool create_internal_tmp_table_from_heap(THD *thd, TABLE *table,
ENGINE_COLUMNDEF *start_recinfo,
ENGINE_COLUMNDEF **recinfo,
- int error, bool ignore_last_dupp_key_error);
+ int error, bool ignore_last_dup,
+ bool *is_duplicate);
uint find_shortest_key(TABLE *table, const key_map *usable_keys);
Field* create_tmp_field_from_field(THD *thd, Field* org_field,
const char *name, TABLE *table,
=== modified file 'sql/sql_show.cc'
--- a/sql/sql_show.cc 2010-04-10 09:11:47 +0000
+++ b/sql/sql_show.cc 2010-04-16 14:20:09 +0000
@@ -2475,7 +2475,8 @@ bool schema_table_store_record(THD *thd,
TMP_TABLE_PARAM *param= table->pos_in_table_list->schema_table_param;
if (create_internal_tmp_table_from_heap(thd, table, param->start_recinfo,
- ¶m->recinfo, error, 0))
+ ¶m->recinfo, error,
+ FALSE, NULL))
return 1;
}
return 0;
=== modified file 'sql/sql_union.cc'
--- a/sql/sql_union.cc 2010-04-09 08:22:10 +0000
+++ b/sql/sql_union.cc 2010-04-16 14:20:09 +0000
@@ -73,7 +73,7 @@ bool select_union::send_data(List<Item>
create_internal_tmp_table_from_heap(thd, table,
tmp_table_param.start_recinfo,
&tmp_table_param.recinfo, error,
- 1))
+ TRUE, NULL))
return 1;
}
return 0;
=== modified file 'sql/sql_update.cc'
--- a/sql/sql_update.cc 2010-04-10 09:11:47 +0000
+++ b/sql/sql_update.cc 2010-04-16 14:20:09 +0000
@@ -1821,7 +1821,7 @@ bool multi_update::send_data(List<Item>
create_internal_tmp_table_from_heap(thd, tmp_table,
tmp_table_param[offset].start_recinfo,
&tmp_table_param[offset].recinfo,
- error, 1))
+ error, TRUE, NULL))
{
do_update=0;
MYSQL_MULTI_UPDATE_DONE(1, 0, 0);
Attachment: [text/bzr-bundle] bzr/roy.lyseng@sun.com-20100416142009-8lw6ptxkgbguohy2.bundle
| Thread |
|---|
| • bzr push into mysql-6.0-codebase-bugfixing branch (roy.lyseng:3873 to 3874)Bug#51110 | Roy Lyseng | 16 Apr |