#At file:///export/home/didrik/repo/next-mr-opt-team-wl1393-merge/ based on revid:tor.didriksen@stripped
3252 Tor Didriksen 2010-12-01
WL#1393 Optimizing filesort with small limit
More review comments.
added:
sql/filesort_utils.h
renamed:
sql/bounded_queue.cc => sql/filesort_utils.cc
modified:
libmysqld/CMakeLists.txt
sql/CMakeLists.txt
sql/bounded_queue.h
sql/filesort.cc
unittest/gunit/bounded_queue-t.cc
sql/filesort_utils.cc
=== modified file 'libmysqld/CMakeLists.txt'
--- a/libmysqld/CMakeLists.txt 2010-11-29 11:21:22 +0000
+++ b/libmysqld/CMakeLists.txt 2010-12-01 14:18:38 +0000
@@ -44,7 +44,7 @@ SET(SQL_EMBEDDED_SOURCES emb_qcache.cc l
../sql-common/client_plugin.c
../sql/password.c ../sql/discover.cc ../sql/derror.cc
../sql/field.cc ../sql/field_conv.cc
- ../sql/bounded_queue.cc
+ ../sql/filesort_utils.cc
../sql/filesort.cc ../sql/gstream.cc
../sql/handler.cc ../sql/hash_filo.cc ../sql/hostname.cc
../sql/init.cc ../sql/item_buff.cc ../sql/item_cmpfunc.cc
=== modified file 'sql/CMakeLists.txt'
--- a/sql/CMakeLists.txt 2010-11-29 11:21:22 +0000
+++ b/sql/CMakeLists.txt 2010-12-01 14:18:38 +0000
@@ -40,7 +40,7 @@ ENDIF()
SET (SQL_SOURCE
../sql-common/client.c derror.cc des_key_file.cc
discover.cc ../libmysql/errmsg.c field.cc field_conv.cc
- bounded_queue.cc
+ filesort_utils.cc
filesort.cc gstream.cc sha2.cc
handler.cc hash_filo.h sql_plugin_services.h
hostname.cc init.cc item.cc item_buff.cc item_cmpfunc.cc
@@ -112,7 +112,7 @@ SET (SLAVE_SOURCE rpl_slave.cc rpl_repor
ADD_LIBRARY(slave ${SLAVE_SOURCE})
ADD_DEPENDENCIES(slave GenError)
ADD_LIBRARY(sqlgunitlib
- bounded_queue.cc mdl.cc sql_list.cc sql_string.cc thr_malloc.cc)
+ filesort_utils.cc mdl.cc sql_list.cc sql_string.cc thr_malloc.cc)
ADD_DEPENDENCIES(sqlgunitlib GenError)
=== modified file 'sql/bounded_queue.h'
--- a/sql/bounded_queue.h 2010-11-30 11:54:00 +0000
+++ b/sql/bounded_queue.h 2010-12-01 14:18:38 +0000
@@ -74,7 +74,11 @@ public:
@param max_elements The size of the queue.
@param offset_to_key Offset to key in elements stored in the queue.
- @param max_at_top Set to 1 if you want biggest element on top.
+ @param max_at_top Set to true if you want biggest element on top.
+ false: We keep the n largest elements.
+ pop() will return the smallest key in the result set.
+ true: We keep the n smallest elements.
+ pop() will return the largest key in the result set.
@param compare Compare function for elements, takes 3 arguments.
If NULL, we use get_ptr_compare(compare_length).
@param compare_length Length of the data (i.e. the keys) used for sorting.
@@ -183,25 +187,4 @@ void Bounded_queue<Element_type, Key_typ
}
}
-
-/*
- Calculate cost of merge sort
-
- @param num_rows Total number of rows.
- @param num_keys_per_buffer Number of keys per buffer.
- @param elem_size Size of each element.
-
- Calculates cost of merge sort by simulating call to merge_many_buff().
-
- @retval
- computed cost of merge sort
-
- @note
- Declared here in order to be able to unit test it,
- since library dependencies have not been sorted out yet.
-*/
-double get_merge_many_buffs_cost_fast(ha_rows num_rows,
- ha_rows num_keys_per_buffer,
- uint elem_size);
-
#endif // BOUNDED_QUEUE_INCLUDED
=== modified file 'sql/filesort.cc'
--- a/sql/filesort.cc 2010-11-30 11:54:00 +0000
+++ b/sql/filesort.cc 2010-12-01 14:18:38 +0000
@@ -33,6 +33,7 @@
#include "sql_test.h" // TEST_filesort
#include "opt_range.h" // SQL_SELECT
#include "bounded_queue.h"
+#include "filesort_utils.h"
#include "sql_select.h"
#ifndef THREAD
@@ -213,9 +214,7 @@ ha_rows filesort(THD *thd, TABLE *table,
else
status_var_increment(thd->status_var.filesort_scan_count);
- /*
- If number of rows is not known, use as much of sort buffer as possible.
- */
+ // If number of rows is not known, use as much of sort buffer as possible.
num_rows= table->file->estimate_rows_upper_bound();
if (multi_byte_charset &&
@@ -326,9 +325,7 @@ ha_rows filesort(THD *thd, TABLE *table,
if (num_rows > param.max_rows)
{
- /*
- If find_all_keys() produced more results than the query LIMIT.
- */
+ // If find_all_keys() produced more results than the query LIMIT.
num_rows= param.max_rows;
}
error= 0;
@@ -368,8 +365,9 @@ ha_rows filesort(THD *thd, TABLE *table,
DBUG_POP(); /* Ok to DBUG */
#endif
table->sort= table_sort;
- DBUG_PRINT("exit",("num_rows: %ld examined_rows: %ld",
- (long) num_rows, (long) *examined_rows));
+ DBUG_PRINT("exit",
+ ("num_rows: %ld examined_rows: %ld found_rows: %ld",
+ (long) num_rows, (long) *examined_rows, (long) *found_rows));
MYSQL_FILESORT_DONE(error, num_rows);
DBUG_RETURN(error ? HA_POS_ERROR : num_rows);
} /* filesort */
=== renamed file 'sql/bounded_queue.cc' => 'sql/filesort_utils.cc'
--- a/sql/bounded_queue.cc 2010-11-30 11:54:00 +0000
+++ b/sql/filesort_utils.cc 2010-12-01 14:18:38 +0000
@@ -13,7 +13,7 @@
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */
-#include "bounded_queue.h"
+#include "filesort_utils.h"
#include "sql_const.h"
#include "sql_sort.h"
@@ -46,40 +46,40 @@ double get_merge_many_buffs_cost_fast(ha
ha_rows last_n_elems= num_rows % num_keys_per_buffer;
double total_cost;
- /* Calculate CPU cost of sorting buffers. */
+ // Calculate CPU cost of sorting buffers.
total_cost=
( num_buffers * num_keys_per_buffer * log(1.0 + num_keys_per_buffer) +
last_n_elems * log(1.0 + last_n_elems) )
/ TIME_FOR_COMPARE_ROWID;
- /* Simulate behavior of merge_many_buff(). */
+ // Simulate behavior of merge_many_buff().
while (num_buffers >= MERGEBUFF2)
{
- /* Calculate # of calls to merge_buffers() */
+ // Calculate # of calls to merge_buffers().
const ha_rows loop_limit= num_buffers - MERGEBUFF*3/2;
const ha_rows num_merge_calls= 1 + loop_limit/MERGEBUFF;
- const ha_rows i_end_value= num_merge_calls * MERGEBUFF;
+ const ha_rows num_remaining_buffs=
+ num_buffers - num_merge_calls * MERGEBUFF;
- /* Cost of merge sort 'num_merge_calls' */
+ // Cost of merge sort 'num_merge_calls'.
total_cost+=
num_merge_calls *
get_merge_cost(num_keys_per_buffer * MERGEBUFF, MERGEBUFF, elem_size);
- /* # of records in remaining buffers. */
- last_n_elems+= (num_buffers - i_end_value) * num_keys_per_buffer;
+ // # of records in remaining buffers.
+ last_n_elems+= num_remaining_buffs * num_keys_per_buffer;
- /* Cost of merge sort of remaining buffers. */
- total_cost+= get_merge_cost(last_n_elems,
- num_buffers - i_end_value, elem_size);
+ // Cost of merge sort of remaining buffers.
+ total_cost+=
+ get_merge_cost(last_n_elems, 1 + num_remaining_buffs, elem_size);
num_buffers= num_merge_calls;
num_keys_per_buffer*= MERGEBUFF;
}
- /* Simulate final merge_buff call. */
+ // Simulate final merge_buff call.
last_n_elems+= num_keys_per_buffer * num_buffers;
- total_cost+= get_merge_cost(last_n_elems, num_buffers, elem_size);
-
+ total_cost+= get_merge_cost(last_n_elems, 1 + num_buffers, elem_size);
return total_cost;
}
=== added file 'sql/filesort_utils.h'
--- a/sql/filesort_utils.h 1970-01-01 00:00:00 +0000
+++ b/sql/filesort_utils.h 2010-12-01 14:18:38 +0000
@@ -0,0 +1,27 @@
+#ifndef FILESORT_UTILS_INCLUDED
+
+#include "my_base.h"
+
+/*
+ Calculate cost of merge sort
+
+ @param num_rows Total number of rows.
+ @param num_keys_per_buffer Number of keys per buffer.
+ @param elem_size Size of each element.
+
+ Calculates cost of merge sort by simulating call to merge_many_buff().
+
+ @retval
+ computed cost of merge sort
+
+ @note
+ Declared here in order to be able to unit test it,
+ since library dependencies have not been sorted out yet.
+*/
+
+#define FILESORT_UTILS_INCLUDED
+double get_merge_many_buffs_cost_fast(ha_rows num_rows,
+ ha_rows num_keys_per_buffer,
+ uint elem_size);
+
+#endif // FILESORT_UTILS_INCLUDED
=== modified file 'unittest/gunit/bounded_queue-t.cc'
--- a/unittest/gunit/bounded_queue-t.cc 2010-11-30 11:54:00 +0000
+++ b/unittest/gunit/bounded_queue-t.cc 2010-12-01 14:18:38 +0000
@@ -21,6 +21,7 @@
#include <stddef.h>
#include "bounded_queue.h"
+#include "filesort_utils.h"
#include "my_sys.h"
namespace {
@@ -130,6 +131,12 @@ protected:
std::random_shuffle(&m_test_data[0], &m_test_data[array_size(m_test_data)]);
}
+ void insert_test_data()
+ {
+ for (int ix= 0; ix < array_size(m_test_data); ++ix)
+ m_queue.push(&m_test_data[ix]);
+ }
+
// Key pointers and data, used by the queue_xxx() functions.
Key_container<num_elements / 2, Test_key> m_keys;
@@ -209,10 +216,7 @@ TEST_F(BoundedQueueTest, zero_size_queue
EXPECT_EQ(0, m_queue.init(0, 0, true, test_key_compare,
m_key_size,
&test_keymaker, NULL, m_keys.key_ptrs));
- for (int ix= 0; ix < array_size(m_test_data); ++ix)
- {
- m_queue.push(&m_test_data[ix]);
- }
+ insert_test_data();
EXPECT_EQ(1U, m_queue.num_elements());
}
@@ -220,16 +224,36 @@ TEST_F(BoundedQueueTest, zero_size_queue
/*
Verifies that push and bounded size works, and that pop() gives sorted order.
*/
-TEST_F(BoundedQueueTest, push_and_pop)
+TEST_F(BoundedQueueTest, push_and_pop_keep_largest)
{
- EXPECT_EQ(0, m_queue.init(num_elements/2, 0, true, test_key_compare,
+ EXPECT_EQ(0, m_queue.init(num_elements/2, 0, false, test_key_compare,
m_key_size,
&test_keymaker, NULL, m_keys.key_ptrs));
- for (int ix= 0; ix < array_size(m_test_data); ++ix)
+ insert_test_data();
+ // We expect the queue to contain [7 .. 13]
+ const int max_key_val= array_size(m_test_data) - 1;
+ while (m_queue.num_elements() > 0)
{
- m_queue.push(&m_test_data[ix]);
+ Test_key **top= m_queue.pop();
+ int expected_key_val= max_key_val - m_queue.num_elements();
+ int key_val= (*top)->key;
+ EXPECT_EQ(expected_key_val, key_val);
+ Test_element *element= (*top)->element;
+ EXPECT_EQ(expected_key_val, element->val);
}
- // We expect the queue to contain [0 .. n-1]
+}
+
+
+/*
+ Verifies that push and bounded size works, and that pop() gives sorted order.
+ */
+TEST_F(BoundedQueueTest, push_and_pop_keep_smallest)
+{
+ EXPECT_EQ(0, m_queue.init(num_elements/2, 0, true, test_key_compare,
+ m_key_size,
+ &test_keymaker, NULL, m_keys.key_ptrs));
+ insert_test_data();
+ // We expect the queue to contain [6 .. 0]
while (m_queue.num_elements() > 0)
{
Test_key **top= m_queue.pop();
@@ -250,11 +274,7 @@ TEST_F(BoundedQueueTest, insert_and_sort
EXPECT_EQ(0, m_queue.init(num_elements/2, 0, true, test_key_compare,
m_key_size,
&test_keymaker, NULL, m_keys.key_ptrs));
- for (int ix= 0; ix < array_size(m_test_data); ++ix)
- {
- m_queue.push(&m_test_data[ix]);
- }
-
+ insert_test_data();
uchar *base= (uchar*) &m_keys.key_ptrs[0];
size_t size= sizeof(Test_key);
// We sort our keys as strings, so erase all the element pointers first.
Attachment: [text/bzr-bundle] bzr/tor.didriksen@oracle.com-20101201141838-xzyffh20fx9gttij.bundle
| Thread |
|---|
| • bzr commit into mysql-trunk-bugfixing branch (tor.didriksen:3252) WL#1393 | Tor Didriksen | 1 Dec |