List:Internals« Previous MessageNext Message »
From:Eric Jensen Date:June 6 2009 11:43pm
Subject:Re: proposed design for UNION Order By optimization
View as plain text  
this is already faster than the previous behavior of pulling all the  
component selects' results and re-sorting them, even though it doesn't  
seem to be using the index on the temporary table properly and is  
therefore doing an unnecessary filesort for each iteration of the  
merge loop.  my sql-bench script and its output is below.

eric

(ej@lakitu) ~/mysql-5.1.30/sql-bench> ./test-kway-merge --fast -- 
host=127.0.0.1:9306 --user=root
Testing server 'MySQL 5.1.30 debug log' at 2009-06-06 16:36:31

Testing the speed of k-way merge vs traditional union
The  test-tables have 2*1000 rows each, queries have 100 unions, and  
request a limit of 100 results.  They will be run in a loop 100 times.

Creating tables
Inserting 1000 rows
Time to insert (1000): 59 wallclock secs ( 3.00 usr  3.82 sys +  0.00  
cusr  0.00 csys =  6.82 CPU)

Testing unions with k-way merge descending
Time for union_with_kway_desc (100:1000): 18 wallclock secs ( 0.04  
usr  0.01 sys +  0.00 cusr  0.00 csys =  0.05 CPU)
Testing unions without k-way merge descending
Time for union_without_kway_desc (100:1000): 19 wallclock secs ( 0.05  
usr  0.01 sys +  0.00 cusr  0.00 csys =  0.06 CPU)

Inserting 1000 more rows
Time to insert again (1000): 55 wallclock secs ( 2.84 usr  3.74 sys +   
0.00 cusr  0.00 csys =  6.58 CPU)

Testing unions with k-way merge ascending
Time for union_with_kway_asc (100:10000): 53 wallclock secs ( 0.10  
usr  0.03 sys +  0.00 cusr  0.00 csys =  0.13 CPU)
Testing unions without k-way merge ascending
Time for union_without_kway_asc (100:10000): 69 wallclock secs ( 0.09  
usr  0.03 sys +  0.00 cusr  0.00 csys =  0.12 CPU)
Total time: 277 wallclock secs ( 6.16 usr  7.67 sys +  0.00 cusr  0.00  
csys = 13.83 CPU)

(ej@lakitu) ~/mysql-5.1.30/sql-bench> cat test-kway-merge
#!/usr/bin/perl
# Copyright (C) 2000-2001, 2003 MySQL AB
#
# This library is free software; you can redistribute it and/or
# modify it under the terms of the GNU Library General Public
# License as published by the Free Software Foundation; version 2
# of the License.
#
# This library is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Library General Public License for more details.
#
# You should have received a copy of the GNU Library General Public
# License along with this library; if not, write to the Free
# Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
# MA 02111-1307, USA
#
# Test of k-way merge in union
#
##################### Standard benchmark inits  
##############################

use Cwd;
use DBI;
use Getopt::Long;
use Benchmark;

# fix sequence
srand(0);

$opt_loop_count=1000;
$opt_small_loop_count=100;
$opt_unions=100;
$opt_limit=100;

$pwd = cwd(); $pwd = "." if ($pwd eq '');
require "$pwd/bench-init.pl" || die "Can't read Configuration file: $! 
\n";

$opt_unions=min($opt_unions,($limits->{'query_size'}-50)/24);

if ($opt_small_test)
{
   $opt_loop_count/=10;
   $opt_small_loop_count/=10;
   $opt_unions/=10;
   $opt_limit/=10;
}

print "Testing the speed of k-way merge vs traditional union\n";
print "The $opt_union test-tables have 2*$opt_loop_count rows each,  
queries have $opt_unions unions, and request a limit of $opt_limit  
results.  They will be run in a loop $opt_small_loop_count times.\n\n";

####
####  Connect and start timeing
####

$dbh = $server->connect();
$start_time=new Benchmark;

####
#### Create needed tables
####

goto select_test if ($opt_skip_create);

print "Creating tables\n";

for ($i = 0; $i < $opt_unions; $i++) {
   $dbh->do("drop table bench$i" . $server->{'drop_attr'});

   do_many($dbh,$server->create("bench$i",
                              ["id int NOT NULL",
                               "i1 int NOT NULL",
                               "i2 int",
                               "i3 int",
                               "vc1 varchar(150) NOT NULL",
                               "vc2 varchar(100) NOT NULL",
                               "vc3 varchar(50)"],
                              ["primary key (id)",
                               "index (i1)",
                               "index (i2)"]));
}

if ($opt_lock_tables)
{
   $query = "LOCK TABLES ";
   for ($i = 0; $i < $opt_unions; $i++) {
     $query .= "bench$i WRITE";
     $query .= ", " if ($i < $opt_unions - 1);
   }
   do_query($dbh,$query);
}

if ($opt_fast && defined($server->{vacuum}))
{
   $server->vacuum(1,\$dbh);
}

####
#### Insert $opt_loop_count records with
#### id:        distributed values 0 - > $opt_loop_count/100
####

print "Inserting $opt_loop_count rows\n";

$loop_time=new Benchmark;

if ($opt_fast && $server->{transactions})
{
   $dbh->{AutoCommit} = 0;
}

for ($i = 0; $i < $opt_unions; $i++) {
   $query="insert ignore into bench$i values";
   for ($j=0; $j < $opt_loop_count ; $j++)
   {
     $id = $i1 = $i2 = $i3 = int(rand($opt_loop_count/100)) + 1;
     $vc1 = "repeat('$id',75)";
     $vc2 = "repeat('$id',50)";
     $vc3 = "repeat('$id',25)";
     do_query($dbh,"$query ($id,$i1,$i2,$i3,$vc1,$vc2,$vc3)");
   }
}

if ($opt_fast && $server->{transactions})
{
   $dbh->commit;
   $dbh->{AutoCommit} = 1;
}

$end_time=new Benchmark;
print "Time to insert ($opt_loop_count): " .
     timestr(timediff($end_time, $loop_time),"all") . "\n\n";

if ($opt_lock_tables)
{
   do_query($dbh,"UNLOCK TABLES");
}

if ($opt_fast && defined($server->{vacuum}))
{
   for ($i = 0; $i < $opt_unions; $i++) {
     $server->vacuum(0,\$dbh,"bench$i");
   }
}

if ($opt_lock_tables)
{
   $query = "LOCK TABLES ";
   for ($i = 0; $i < $opt_unions; $i++) {
     $query .= "bench$i WRITE";
     $query .= ", " if ($i < $opt_unions - 1);
   }
   do_query($dbh,$query);
}


####
#### Do some selects on the tables
####

select_test:

if ($opt_fast) {
print "Testing unions with k-way merge descending\n";
$loop_time=new Benchmark;
$rows=0;
for ($i=0 ; $i < $opt_small_loop_count ; $i++)
{
   $query = "";
   for ($j = 0; $j < $opt_unions; $j++) {
     $query .= ($j == 0 ? "(select SQL_ASSUME_SORTED_UNION " :  
"(select ");
     $query .= "* from bench$j order by id desc limit $opt_limit)";
     $query .= " union " if ($j < $opt_unions - 1);
   }
   $rows+=fetch_all_rows($dbh,"$query order by id desc limit  
$opt_limit");
}
$count=$opt_small_loop_count;

$end_time=new Benchmark;
print "Time for union_with_kway_desc ($count:$rows): " .
     timestr(timediff($end_time, $loop_time),"all") . "\n";
}

print "Testing unions without k-way merge descending\n";
$loop_time=new Benchmark;
$rows=0;
for ($i=0 ; $i < $opt_small_loop_count ; $i++)
{
   $query = "";
   for ($j = 0; $j < $opt_unions; $j++) {
     $query .= "(select * from bench$j order by id desc limit  
$opt_limit)";
     $query .= " union " if ($j < $opt_unions - 1);
   }
   $rows+=fetch_all_rows($dbh,"$query order by id desc limit  
$opt_limit");
}
$count=$opt_small_loop_count;

$end_time=new Benchmark;
print "Time for union_without_kway_desc ($count:$rows): " .
     timestr(timediff($end_time, $loop_time),"all") . "\n";

####
#### Insert $opt_loop_count records with
#### id:        distributed values 0 - > $opt_loop_count
####

print "\nInserting $opt_loop_count more rows\n";

$loop_time=new Benchmark;

if ($opt_fast && $server->{transactions})
{
   $dbh->{AutoCommit} = 0;
}

for ($i = 0; $i < $opt_unions; $i++) {
   $query="insert ignore into bench$i values";
   for ($j=0; $j < $opt_loop_count ; $j++)
   {
     $id = $i1 = $i2 = $i3 = int(rand($opt_loop_count)) + 1;
     $vc1 = "repeat('$id',75)";
     $vc2 = "repeat('$id',50)";
     $vc3 = "repeat('$id',25)";
     do_query($dbh,"$query ($id,$i1,$i2,$i3,$vc1,$vc2,$vc3)");
   }
}

if ($opt_fast && $server->{transactions})
{
   $dbh->commit;
   $dbh->{AutoCommit} = 1;
}

$end_time=new Benchmark;
print "Time to insert again ($opt_loop_count): " .
     timestr(timediff($end_time, $loop_time),"all") . "\n\n";

if ($opt_fast) {
print "Testing unions with k-way merge ascending\n";
$loop_time=new Benchmark;
$rows=0;
for ($i=0 ; $i < $opt_small_loop_count ; $i++)
{
   $query = "";
   for ($j = 0; $j < $opt_unions; $j++) {
     $query .= ($j == 0 ? "(select SQL_ASSUME_SORTED_UNION " :  
"(select ");
     $query .= "* from bench$j order by id asc limit $opt_limit)";
     $query .= " union " if ($j < $opt_unions - 1);
   }
   $rows+=fetch_all_rows($dbh,"$query order by id asc limit  
$opt_limit");
}
$count=$opt_small_loop_count;

$end_time=new Benchmark;
print "Time for union_with_kway_asc ($count:$rows): " .
     timestr(timediff($end_time, $loop_time),"all") . "\n";
}

print "Testing unions without k-way merge ascending\n";
$loop_time=new Benchmark;
$rows=0;
for ($i=0 ; $i < $opt_small_loop_count ; $i++)
{
   $query = "";
   for ($j = 0; $j < $opt_unions; $j++) {
     $query .= "(select * from bench$j order by id asc limit  
$opt_limit)";
     $query .= " union " if ($j < $opt_unions - 1);
   }
   $rows+=fetch_all_rows($dbh,"$query order by id asc limit  
$opt_limit");
}
$count=$opt_small_loop_count;

$end_time=new Benchmark;
print "Time for union_without_kway_asc ($count:$rows): " .
     timestr(timediff($end_time, $loop_time),"all") . "\n";

####
#### End of benchmark
####

if ($opt_lock_tables)
{
   do_query($dbh,"UNLOCK TABLES");
}
if (!$opt_skip_delete)
{
   for ($i = 0; $i < $opt_unions; $i++) {
     do_query($dbh,"drop table bench$i" . $server->{'drop_attr'});
   }
}

if ($opt_fast && defined($server->{vacuum}))
{
   $server->vacuum(0,\$dbh);
}

$dbh->disconnect;                               # close connection

end_benchmark($start_time);

Thread
proposed design for UNION Order By optimizationEric Jensen13 Mar
  • RE: proposed design for UNION Order By optimizationRick James13 Mar
  • Re: proposed design for UNION Order By optimizationEric Jensen6 Jun
    • RE: proposed design for UNION Order By optimizationRick James6 Jun
    • Re: proposed design for UNION Order By optimizationEric Jensen7 Jun
    • Re: proposed design for UNION Order By optimizationEric Jensen5 Jul
      • Re: proposed design for UNION Order By optimizationEric Jensen5 Jul
        • Re: proposed design for UNION Order By optimizationEric Jensen6 Jul
Re: proposed design for UNION Order By optimizationEric Jensen6 Jun
Re: proposed design for UNION Order By optimizationEric Jensen12 Jul