List:Commits« Previous MessageNext Message »
From:Roy Lyseng Date:April 16 2010 2:21pm
Subject:bzr push into mysql-6.0-codebase-bugfixing branch (roy.lyseng:3873 to 3874)
Bug#51110
View as plain text  
 3874 Roy Lyseng	2010-04-16
      Bug#51110: Wrong result with COUNT(*) and semijoin
      
      The duplicate weedout semijoin strategy is shipping certain column
      combinations to a temporary table, in order to filter out any duplicates
      caused by reading duplicate rows from semijoined tables.
      
      The temporary table starts out as a memory table, but if the size reaches
      a maximum value, it is converted to a disk-based table.
      When such conversion occurs, the number of rows output from the query is
      reported as one less than it should be.
      
      The reason for this is that when do_sj_dups_weedout() creates the
      disk-based table, the return value is erroneously reported as 1
      instead of 0, meaning that the row in question is always registered
      as a duplicate. The problem is fixed by setting proper return values.
      
      The function create_internal_tmp_table_from_heap(), which converts the
      memory table to a disk-based table, also copies the offending row
      (the "last row") into the new table. There is an argument ignore_last_dup
      that manages whether duplicate insertions of this row was allowed, but
      duplicate weedout also needed to know whether a duplicate was reported
      or not. This problem was fixed by adding an optional output argument
      is_duplicate to the function.
      
      mysql-test/r/subselect3.result
        Fixed result for bug#51110.
      
      mysql-test/r/subselect3_jcl6.result
        Fixed result for bug#51110.
      
      mysql-test/t/subselect3.test
        Extended previously failing test to be performed with and without
        temporary table overflow. Also added test so that overflow happens
        with and without duplicate row.
      
      sql/sql_select.cc
        do_sj_dups_weedout() returns 1 if create_internal_tmp_table_from_heap()
        is called successfully.
        Added output argument "is_duplicate" to
        create_internal_tmp_table_from_heap, added Doxygen header.
      
      sql/sql_select.h
        Updated prototype of create_internal_tmp_table_from_heap()
      
      sql/sql_show.cc
        Added NULL argument to create_internal_tmp_table_from_heap()
      
      sql/sql_union.cc
        Added NULL argument to create_internal_tmp_table_from_heap()
      
      sql/sql_update.cc
        Added NULL argument to create_internal_tmp_table_from_heap()

    modified:
      mysql-test/r/subselect3.result
      mysql-test/r/subselect3_jcl6.result
      mysql-test/t/subselect3.test
      sql/sql_select.cc
      sql/sql_select.h
      sql/sql_show.cc
      sql/sql_union.cc
      sql/sql_update.cc
 3873 Vladislav Vaintroub	2010-04-16 [merge]
      merge

    modified:
      cmake/os/WindowsCache.cmake
      sql/sys_vars.cc
=== modified file 'mysql-test/r/subselect3.result'
--- a/mysql-test/r/subselect3.result	2010-03-25 09:02:07 +0000
+++ b/mysql-test/r/subselect3.result	2010-04-16 14:20:09 +0000
@@ -1171,26 +1171,54 @@ id	select_type	table	type	possible_keys	
 1	PRIMARY	t4	eq_ref	PRIMARY	PRIMARY	4	test.t1.c	1	Using index; FirstMatch(t1)
 1	PRIMARY	t3	ALL	NULL	NULL	NULL	NULL	100	Using where; Using join buffer
 drop table t1, t3, t4;
-create table t1 (a int) as select * from t0 where a < 5;
+create table t1 (a int);
+insert into t1 values (0),(0),(0),(1),(1),(1),(2),(2),(2),(3),(3),(3);
 set @save_max_heap_table_size=@@max_heap_table_size;
+set @save_optimizer_join_cache_level=@@optimizer_join_cache_level;
 set @@optimizer_switch='firstmatch=off,materialization=off';
 set @@max_heap_table_size= 16384;
-explain select count(*) from t0 A, t0 B, t0 C, t0 D where D.a in (select a from t1 E);
+# Attempt to make one test that overflows the heap table when a
+# non-duplicate row is inserted and one test that overflows the
+# heap table when a duplicate record is inserted. Debugging showed
+# that these situations occurred with max_heap_table_size=16384
+# and optimizer_join_cache_level equals 1 and 0, respectively.
+# Finally execute a test that does not overflow the heap table.
+explain
+select count(*) from t0 A, t0 B, t0 C
+where C.a in (select a from t1 D);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	PRIMARY	E	ALL	NULL	NULL	NULL	NULL	5	Start temporary
-1	PRIMARY	A	ALL	NULL	NULL	NULL	NULL	10	Using join buffer
+1	PRIMARY	A	ALL	NULL	NULL	NULL	NULL	10	Start temporary
 1	PRIMARY	B	ALL	NULL	NULL	NULL	NULL	10	Using join buffer
 1	PRIMARY	C	ALL	NULL	NULL	NULL	NULL	10	Using join buffer
-1	PRIMARY	D	ALL	NULL	NULL	NULL	NULL	10	Using where; End temporary; Using join buffer
+1	PRIMARY	D	ALL	NULL	NULL	NULL	NULL	12	Using where; End temporary; Using join buffer
 flush status;
-select count(*) from t0 A, t0 B, t0 C, t0 D where D.a in (select a from t1 E);
+select count(*) from t0 A, t0 B, t0 C
+where C.a in (select a from t1 D);
 count(*)
-4999
+400
 show status like 'Created_tmp_disk_tables';
 Variable_name	Value
 Created_tmp_disk_tables	1
-set @save_max_heap_table_size=@@max_heap_table_size;
+set @@optimizer_join_cache_level= 0;
+flush status;
+select count(*) from t0 A, t0 B, t0 C
+where C.a in (select a from t1 D);
+count(*)
+400
+show status like 'Created_tmp_disk_tables';
+Variable_name	Value
+Created_tmp_disk_tables	1
+set @@max_heap_table_size= @save_max_heap_table_size;
+flush status;
+select count(*) from t0 A, t0 B, t0 C
+where C.a in (select a from t1 D);
+count(*)
+400
+show status like 'Created_tmp_disk_tables';
+Variable_name	Value
+Created_tmp_disk_tables	0
 set @@optimizer_switch=default;
+set @@optimizer_join_cache_level=@save_optimizer_join_cache_level;
 drop table t0, t1;
 create table t0 (a int);
 insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);

=== modified file 'mysql-test/r/subselect3_jcl6.result'
--- a/mysql-test/r/subselect3_jcl6.result	2010-03-09 23:03:02 +0000
+++ b/mysql-test/r/subselect3_jcl6.result	2010-04-16 14:20:09 +0000
@@ -1176,26 +1176,54 @@ id	select_type	table	type	possible_keys	
 1	PRIMARY	t4	eq_ref	PRIMARY	PRIMARY	4	test.t1.c	1	Using index; FirstMatch(t1)
 1	PRIMARY	t3	ALL	NULL	NULL	NULL	NULL	100	Using where; Using join buffer
 drop table t1, t3, t4;
-create table t1 (a int) as select * from t0 where a < 5;
+create table t1 (a int);
+insert into t1 values (0),(0),(0),(1),(1),(1),(2),(2),(2),(3),(3),(3);
 set @save_max_heap_table_size=@@max_heap_table_size;
+set @save_optimizer_join_cache_level=@@optimizer_join_cache_level;
 set @@optimizer_switch='firstmatch=off,materialization=off';
 set @@max_heap_table_size= 16384;
-explain select count(*) from t0 A, t0 B, t0 C, t0 D where D.a in (select a from t1 E);
+# Attempt to make one test that overflows the heap table when a
+# non-duplicate row is inserted and one test that overflows the
+# heap table when a duplicate record is inserted. Debugging showed
+# that these situations occurred with max_heap_table_size=16384
+# and optimizer_join_cache_level equals 1 and 0, respectively.
+# Finally execute a test that does not overflow the heap table.
+explain
+select count(*) from t0 A, t0 B, t0 C
+where C.a in (select a from t1 D);
 id	select_type	table	type	possible_keys	key	key_len	ref	rows	Extra
-1	PRIMARY	E	ALL	NULL	NULL	NULL	NULL	5	Start temporary
-1	PRIMARY	A	ALL	NULL	NULL	NULL	NULL	10	Using join buffer
+1	PRIMARY	A	ALL	NULL	NULL	NULL	NULL	10	Start temporary
 1	PRIMARY	B	ALL	NULL	NULL	NULL	NULL	10	Using join buffer
 1	PRIMARY	C	ALL	NULL	NULL	NULL	NULL	10	Using join buffer
-1	PRIMARY	D	ALL	NULL	NULL	NULL	NULL	10	Using where; End temporary; Using join buffer
+1	PRIMARY	D	ALL	NULL	NULL	NULL	NULL	12	Using where; End temporary; Using join buffer
 flush status;
-select count(*) from t0 A, t0 B, t0 C, t0 D where D.a in (select a from t1 E);
+select count(*) from t0 A, t0 B, t0 C
+where C.a in (select a from t1 D);
 count(*)
-4999
+400
 show status like 'Created_tmp_disk_tables';
 Variable_name	Value
 Created_tmp_disk_tables	1
-set @save_max_heap_table_size=@@max_heap_table_size;
+set @@optimizer_join_cache_level= 0;
+flush status;
+select count(*) from t0 A, t0 B, t0 C
+where C.a in (select a from t1 D);
+count(*)
+400
+show status like 'Created_tmp_disk_tables';
+Variable_name	Value
+Created_tmp_disk_tables	1
+set @@max_heap_table_size= @save_max_heap_table_size;
+flush status;
+select count(*) from t0 A, t0 B, t0 C
+where C.a in (select a from t1 D);
+count(*)
+400
+show status like 'Created_tmp_disk_tables';
+Variable_name	Value
+Created_tmp_disk_tables	0
 set @@optimizer_switch=default;
+set @@optimizer_join_cache_level=@save_optimizer_join_cache_level;
 drop table t0, t1;
 create table t0 (a int);
 insert into t0 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9);

=== modified file 'mysql-test/t/subselect3.test'
--- a/mysql-test/t/subselect3.test	2010-01-20 10:11:29 +0000
+++ b/mysql-test/t/subselect3.test	2010-04-16 14:20:09 +0000
@@ -973,18 +973,42 @@ drop table t1, t3, t4;
 #
 # Test if we handle duplicate elimination temptable overflowing to disk
 #
-create table t1 (a int) as select * from t0 where a < 5;
+create table t1 (a int);
+insert into t1 values (0),(0),(0),(1),(1),(1),(2),(2),(2),(3),(3),(3);
 
 set @save_max_heap_table_size=@@max_heap_table_size;
+set @save_optimizer_join_cache_level=@@optimizer_join_cache_level;
 set @@optimizer_switch='firstmatch=off,materialization=off';
 set @@max_heap_table_size= 16384;
 
-explain select count(*) from t0 A, t0 B, t0 C, t0 D where D.a in (select a from t1 E);
+--echo # Attempt to make one test that overflows the heap table when a
+--echo # non-duplicate row is inserted and one test that overflows the
+--echo # heap table when a duplicate record is inserted. Debugging showed
+--echo # that these situations occurred with max_heap_table_size=16384
+--echo # and optimizer_join_cache_level equals 1 and 0, respectively.
+--echo # Finally execute a test that does not overflow the heap table.
+explain
+select count(*) from t0 A, t0 B, t0 C
+where C.a in (select a from t1 D);
 flush status;
-select count(*) from t0 A, t0 B, t0 C, t0 D where D.a in (select a from t1 E);
+select count(*) from t0 A, t0 B, t0 C
+where C.a in (select a from t1 D);
 show status like 'Created_tmp_disk_tables';
-set @save_max_heap_table_size=@@max_heap_table_size;
+
+set @@optimizer_join_cache_level= 0;
+flush status;
+select count(*) from t0 A, t0 B, t0 C
+where C.a in (select a from t1 D);
+show status like 'Created_tmp_disk_tables';
+
+set @@max_heap_table_size= @save_max_heap_table_size;
+flush status;
+select count(*) from t0 A, t0 B, t0 C
+where C.a in (select a from t1 D);
+show status like 'Created_tmp_disk_tables';
+
 set @@optimizer_switch=default;
+set @@optimizer_join_cache_level=@save_optimizer_join_cache_level;
 drop table t0, t1;
 
 #

=== modified file 'sql/sql_select.cc'
--- a/sql/sql_select.cc	2010-04-13 13:02:27 +0000
+++ b/sql/sql_select.cc	2010-04-16 14:20:09 +0000
@@ -160,7 +160,8 @@ create_internal_tmp_table_from_heap2(THD
                                      ENGINE_COLUMNDEF *start_recinfo,
                                      ENGINE_COLUMNDEF **recinfo, 
                                      int error,
-                                     bool ignore_last_dupp_key_error,
+                                     bool ignore_last_dup,
+                                     bool *is_duplicate,
                                      handlerton *hton,
                                      const char *proc_info);
 static int do_select(JOIN *join,List<Item> *fields,TABLE *tmp_table,
@@ -9875,9 +9876,10 @@ end_sj_materialize(JOIN *join, JOIN_TAB 
     {
       /* create_myisam_from_heap will generate error if needed */
       if (table->file->is_fatal_error(error, HA_CHECK_DUP) &&
-          create_internal_tmp_table_from_heap(thd, table,
-                                              sjm->sjm_table_param.start_recinfo, 
-                                              &sjm->sjm_table_param.recinfo, error, 1))
+          create_internal_tmp_table_from_heap(
+                             thd, table,
+                             sjm->sjm_table_param.start_recinfo, 
+                             &sjm->sjm_table_param.recinfo, error, TRUE, NULL))
         DBUG_RETURN(NESTED_LOOP_ERROR); /* purecov: inspected */
     }
   }
@@ -15869,26 +15871,57 @@ static bool create_internal_tmp_table(TA
 
 
 /**
-  If a HEAP table gets full, create a MyISAM table and copy all rows
+  If a MEMORY table gets full, create a disk-based table and copy all rows
   to this.
+
+  @param thd             THD reference
+  @param table           Table reference
+  @param start_recinfo   Engine's column descriptions
+  @param recinfo[in,out] End of engine's column descriptions
+  @param error           Reason why inserting into MEMORY table failed. 
+  @param ignore_last_dup If true, ignore duplicate key error for last
+                         inserted key (see detailed description below).
+  @param is_duplicate[out] if non-NULL and ignore_last_dup is TRUE,
+                         return TRUE if last key was a duplicate,
+                         and FALSE otherwise.
+
+  @detail
+    Function can be called with any error code, but only HA_ERR_RECORD_FILE_FULL
+    will be handled, all other errors cause a fatal error to be thrown.
+    The function creates a disk-based temporary table, copies all records
+    from the MEMORY table into this new table, deletes the old table and
+    switches to use the new table within the table handle.
+    The function uses table->record[1] as a temporary buffer while copying.
+
+    The function assumes that table->record[0] contains the row that caused
+    the error when inserting into the MEMORY table (the "last row").
+    After all existing rows have been copied to the new table, the last row
+    is attempted to be inserted as well. If ignore_last_dup is true,
+    this row can be a duplicate of an existing row without throwing an error.
+    If is_duplicate is non-NULL, an indication of whether the last row was
+    a duplicate is returned.
 */
 
 bool create_internal_tmp_table_from_heap(THD *thd, TABLE *table,
                                          ENGINE_COLUMNDEF *start_recinfo,
                                          ENGINE_COLUMNDEF **recinfo, 
-                                         int error, bool ignore_last_dupp_key_error)
+                                         int error, bool ignore_last_dup,
+                                         bool *is_duplicate)
 {
   return create_internal_tmp_table_from_heap2(thd, table,
                                               start_recinfo, recinfo, error,
-                                              ignore_last_dupp_key_error,
+                                              ignore_last_dup,
+                                              is_duplicate,
                                               myisam_hton,
                                               "converting HEAP to MyISAM");
 }
 
 
-/*
-  If a HEAP table gets full, create a internal table in MyISAM or Maria
-  and copy all rows to this
+/**
+  If a MEMORY table gets full, create a disk-based table
+  and copy all rows to this.
+
+  @details See documentation of create_internal_tmp_table_from_heap()
 */
 
 
@@ -15897,7 +15930,8 @@ create_internal_tmp_table_from_heap2(THD
                                      ENGINE_COLUMNDEF *start_recinfo,
                                      ENGINE_COLUMNDEF **recinfo, 
                                      int error,
-                                     bool ignore_last_dupp_key_error,
+                                     bool ignore_last_dup,
+                                     bool *is_duplicate,
                                      handlerton *hton,
                                      const char *proc_info)
 {
@@ -15978,8 +16012,15 @@ create_internal_tmp_table_from_heap2(THD
   if ((write_err=new_table.file->ha_write_row(table->record[0])))
   {
     if (new_table.file->is_fatal_error(write_err, HA_CHECK_DUP) ||
-	!ignore_last_dupp_key_error)
+	!ignore_last_dup)
       goto err;
+    if (is_duplicate)
+      *is_duplicate= TRUE;
+  }
+  else
+  {
+    if (is_duplicate)
+      *is_duplicate= FALSE;
   }
 
   /* remove heap table and change to use myisam table */
@@ -16794,13 +16835,19 @@ int do_sj_dups_weedout(THD *thd, SJ_TMP_
   error= sjtbl->tmp_table->file->ha_write_row(sjtbl->tmp_table->record[0]);
   if (error)
   {
-    /* create_internal_tmp_table_from_heap will generate error if needed */
-    if (sjtbl->tmp_table->file->is_fatal_error(error, HA_CHECK_DUP) &&
-        create_internal_tmp_table_from_heap(thd, sjtbl->tmp_table,
+    /* If this is a duplicate error, return immediately */
+    if (!sjtbl->tmp_table->file->is_fatal_error(error, HA_CHECK_DUP))
+      DBUG_RETURN(1);
+    /*
+      Other error than duplicate error: Attempt to create a temporary table.
+    */
+    bool is_duplicate;
+    if (create_internal_tmp_table_from_heap(thd, sjtbl->tmp_table,
                                             sjtbl->start_recinfo,
-                                            &sjtbl->recinfo, error, 1))
+                                            &sjtbl->recinfo, error, TRUE,
+                                            &is_duplicate))
       DBUG_RETURN(-1);
-    DBUG_RETURN(1);
+    DBUG_RETURN(is_duplicate ? 1 : 0);
   }
   DBUG_RETURN(0);
 }
@@ -17934,10 +17981,11 @@ end_write(JOIN *join, JOIN_TAB *join_tab
       {
         if (!table->file->is_fatal_error(error, HA_CHECK_DUP))
 	  goto end;
-	if (create_internal_tmp_table_from_heap(join->thd, table,
-                                                join->tmp_table_param.start_recinfo,
-                                                &join->tmp_table_param.recinfo,
-                                                error, 1))
+	if (create_internal_tmp_table_from_heap(
+                                       join->thd, table,
+                                       join->tmp_table_param.start_recinfo,
+                                       &join->tmp_table_param.recinfo,
+                                       error, TRUE, NULL))
 	  DBUG_RETURN(NESTED_LOOP_ERROR);        // Not a table_is_full error
 	table->s->uniques=0;			// To ensure rows are the same
       }
@@ -18023,7 +18071,7 @@ end_update(JOIN *join, JOIN_TAB *join_ta
     if (create_internal_tmp_table_from_heap(join->thd, table,
                                             join->tmp_table_param.start_recinfo,
                                             &join->tmp_table_param.recinfo,
-                                            error, 0))
+                                            error, FALSE, NULL))
       DBUG_RETURN(NESTED_LOOP_ERROR);            // Not a table_is_full error
     /* Change method to update rows */
     table->file->ha_index_init(0, 0);
@@ -18118,10 +18166,11 @@ end_write_group(JOIN *join, JOIN_TAB *jo
 	{
           int error= table->file->ha_write_row(table->record[0]);
           if (error &&
-              create_internal_tmp_table_from_heap(join->thd, table,
-                                                  join->tmp_table_param.start_recinfo,
-                                                  &join->tmp_table_param.recinfo,
-                                                  error, 0))
+              create_internal_tmp_table_from_heap(
+                                          join->thd, table,
+                                          join->tmp_table_param.start_recinfo,
+                                          &join->tmp_table_param.recinfo,
+                                          error, FALSE, NULL))
 	    DBUG_RETURN(NESTED_LOOP_ERROR);
         }
         if (join->rollup.state != ROLLUP::STATE_NONE)
@@ -21831,7 +21880,7 @@ int JOIN::rollup_write_data(uint idx, TA
 	if (create_internal_tmp_table_from_heap(thd, table_arg, 
                                                 tmp_table_param.start_recinfo,
                                                 &tmp_table_param.recinfo,
-                                                write_error, 0))
+                                                write_error, FALSE, NULL))
 	  return 1;		     
       }
     }

=== modified file 'sql/sql_select.h'
--- a/sql/sql_select.h	2010-04-08 10:50:40 +0000
+++ b/sql/sql_select.h	2010-04-16 14:20:09 +0000
@@ -1843,7 +1843,8 @@ void copy_funcs(Item **func_ptr);
 bool create_internal_tmp_table_from_heap(THD *thd, TABLE *table,
                                          ENGINE_COLUMNDEF *start_recinfo,
                                          ENGINE_COLUMNDEF **recinfo, 
-                                         int error, bool ignore_last_dupp_key_error);
+                                         int error, bool ignore_last_dup,
+                                         bool *is_duplicate);
 uint find_shortest_key(TABLE *table, const key_map *usable_keys);
 Field* create_tmp_field_from_field(THD *thd, Field* org_field,
                                    const char *name, TABLE *table,

=== modified file 'sql/sql_show.cc'
--- a/sql/sql_show.cc	2010-04-10 09:11:47 +0000
+++ b/sql/sql_show.cc	2010-04-16 14:20:09 +0000
@@ -2475,7 +2475,8 @@ bool schema_table_store_record(THD *thd,
     TMP_TABLE_PARAM *param= table->pos_in_table_list->schema_table_param;
 
     if (create_internal_tmp_table_from_heap(thd, table, param->start_recinfo, 
-                                            &param->recinfo, error, 0))
+                                            &param->recinfo, error,
+                                            FALSE, NULL))
       return 1;
   }
   return 0;

=== modified file 'sql/sql_union.cc'
--- a/sql/sql_union.cc	2010-04-09 08:22:10 +0000
+++ b/sql/sql_union.cc	2010-04-16 14:20:09 +0000
@@ -73,7 +73,7 @@ bool select_union::send_data(List<Item> 
         create_internal_tmp_table_from_heap(thd, table,
                                             tmp_table_param.start_recinfo, 
                                             &tmp_table_param.recinfo, error,
-                                            1))
+                                            TRUE, NULL))
       return 1;
   }
   return 0;

=== modified file 'sql/sql_update.cc'
--- a/sql/sql_update.cc	2010-04-10 09:11:47 +0000
+++ b/sql/sql_update.cc	2010-04-16 14:20:09 +0000
@@ -1821,7 +1821,7 @@ bool multi_update::send_data(List<Item> 
             create_internal_tmp_table_from_heap(thd, tmp_table,
                                          tmp_table_param[offset].start_recinfo,
                                          &tmp_table_param[offset].recinfo,
-                                         error, 1))
+                                         error, TRUE, NULL))
         {
           do_update=0;
           MYSQL_MULTI_UPDATE_DONE(1, 0, 0);


Attachment: [text/bzr-bundle] bzr/roy.lyseng@sun.com-20100416142009-8lw6ptxkgbguohy2.bundle
Thread
bzr push into mysql-6.0-codebase-bugfixing branch (roy.lyseng:3873 to 3874)Bug#51110Roy Lyseng16 Apr