List:Commits« Previous MessageNext Message »
From:Tor Didriksen Date:December 1 2010 2:18pm
Subject:bzr commit into mysql-trunk-bugfixing branch (tor.didriksen:3252) WL#1393
View as plain text  
#At file:///export/home/didrik/repo/next-mr-opt-team-wl1393-merge/ based on revid:tor.didriksen@stripped

 3252 Tor Didriksen	2010-12-01
      WL#1393 Optimizing filesort with small limit
      
      More review comments.

    added:
      sql/filesort_utils.h
    renamed:
      sql/bounded_queue.cc => sql/filesort_utils.cc
    modified:
      libmysqld/CMakeLists.txt
      sql/CMakeLists.txt
      sql/bounded_queue.h
      sql/filesort.cc
      unittest/gunit/bounded_queue-t.cc
      sql/filesort_utils.cc
=== modified file 'libmysqld/CMakeLists.txt'
--- a/libmysqld/CMakeLists.txt	2010-11-29 11:21:22 +0000
+++ b/libmysqld/CMakeLists.txt	2010-12-01 14:18:38 +0000
@@ -44,7 +44,7 @@ SET(SQL_EMBEDDED_SOURCES emb_qcache.cc l
            ../sql-common/client_plugin.c
            ../sql/password.c ../sql/discover.cc ../sql/derror.cc 
            ../sql/field.cc ../sql/field_conv.cc
-           ../sql/bounded_queue.cc
+           ../sql/filesort_utils.cc
            ../sql/filesort.cc ../sql/gstream.cc
            ../sql/handler.cc ../sql/hash_filo.cc ../sql/hostname.cc 
            ../sql/init.cc ../sql/item_buff.cc ../sql/item_cmpfunc.cc 

=== modified file 'sql/CMakeLists.txt'
--- a/sql/CMakeLists.txt	2010-11-29 11:21:22 +0000
+++ b/sql/CMakeLists.txt	2010-12-01 14:18:38 +0000
@@ -40,7 +40,7 @@ ENDIF()
 SET (SQL_SOURCE
               ../sql-common/client.c derror.cc des_key_file.cc
                discover.cc ../libmysql/errmsg.c field.cc  field_conv.cc 
-               bounded_queue.cc
+               filesort_utils.cc
                filesort.cc gstream.cc sha2.cc
                handler.cc hash_filo.h sql_plugin_services.h
                hostname.cc init.cc item.cc item_buff.cc item_cmpfunc.cc 
@@ -112,7 +112,7 @@ SET (SLAVE_SOURCE rpl_slave.cc rpl_repor
 ADD_LIBRARY(slave ${SLAVE_SOURCE})
 ADD_DEPENDENCIES(slave GenError)
 ADD_LIBRARY(sqlgunitlib
-  bounded_queue.cc mdl.cc sql_list.cc sql_string.cc thr_malloc.cc)
+  filesort_utils.cc mdl.cc sql_list.cc sql_string.cc thr_malloc.cc)
 ADD_DEPENDENCIES(sqlgunitlib GenError)
 
 

=== modified file 'sql/bounded_queue.h'
--- a/sql/bounded_queue.h	2010-11-30 11:54:00 +0000
+++ b/sql/bounded_queue.h	2010-12-01 14:18:38 +0000
@@ -74,7 +74,11 @@ public:
 
     @param max_elements   The size of the queue.
     @param offset_to_key  Offset to key in elements stored in the queue.
-    @param max_at_top     Set to 1 if you want biggest element on top.
+    @param max_at_top     Set to true if you want biggest element on top.
+           false: We keep the n largest elements.
+                  pop() will return the smallest key in the result set.
+           true:  We keep the n smallest elements.
+                  pop() will return the largest key in the result set.
     @param compare        Compare function for elements, takes 3 arguments.
                           If NULL, we use get_ptr_compare(compare_length).
     @param compare_length Length of the data (i.e. the keys) used for sorting.
@@ -183,25 +187,4 @@ void Bounded_queue<Element_type, Key_typ
   }
 }
 
-
-/*
-  Calculate cost of merge sort
-
-    @param num_rows            Total number of rows.
-    @param num_keys_per_buffer Number of keys per buffer.
-    @param elem_size           Size of each element.
-
-    Calculates cost of merge sort by simulating call to merge_many_buff().
-
-  @retval
-    computed cost of merge sort
-
-  @note
-    Declared here in order to be able to unit test it,
-    since library dependencies have not been sorted out yet.
-*/
-double get_merge_many_buffs_cost_fast(ha_rows num_rows,
-                                      ha_rows num_keys_per_buffer,
-                                      uint    elem_size);
-
 #endif  // BOUNDED_QUEUE_INCLUDED

=== modified file 'sql/filesort.cc'
--- a/sql/filesort.cc	2010-11-30 11:54:00 +0000
+++ b/sql/filesort.cc	2010-12-01 14:18:38 +0000
@@ -33,6 +33,7 @@
 #include "sql_test.h"                           // TEST_filesort
 #include "opt_range.h"                          // SQL_SELECT
 #include "bounded_queue.h"
+#include "filesort_utils.h"
 #include "sql_select.h"
 
 #ifndef THREAD
@@ -213,9 +214,7 @@ ha_rows filesort(THD *thd, TABLE *table,
   else
     status_var_increment(thd->status_var.filesort_scan_count);
 
-  /*
-    If number of rows is not known, use as much of sort buffer as possible. 
-  */
+  // If number of rows is not known, use as much of sort buffer as possible. 
   num_rows= table->file->estimate_rows_upper_bound();
 
   if (multi_byte_charset &&
@@ -326,9 +325,7 @@ ha_rows filesort(THD *thd, TABLE *table,
 
   if (num_rows > param.max_rows)
   {
-    /*
-      If find_all_keys() produced more results than the query LIMIT.
-     */
+    // If find_all_keys() produced more results than the query LIMIT.
     num_rows= param.max_rows;
   }
   error= 0;
@@ -368,8 +365,9 @@ ha_rows filesort(THD *thd, TABLE *table,
   DBUG_POP();			/* Ok to DBUG */
 #endif
   table->sort= table_sort;
-  DBUG_PRINT("exit",("num_rows: %ld examined_rows: %ld",
-                     (long) num_rows, (long) *examined_rows));
+  DBUG_PRINT("exit",
+             ("num_rows: %ld examined_rows: %ld found_rows: %ld",
+              (long) num_rows, (long) *examined_rows, (long) *found_rows));
   MYSQL_FILESORT_DONE(error, num_rows);
   DBUG_RETURN(error ? HA_POS_ERROR : num_rows);
 } /* filesort */

=== renamed file 'sql/bounded_queue.cc' => 'sql/filesort_utils.cc'
--- a/sql/bounded_queue.cc	2010-11-30 11:54:00 +0000
+++ b/sql/filesort_utils.cc	2010-12-01 14:18:38 +0000
@@ -13,7 +13,7 @@
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
 
-#include "bounded_queue.h"
+#include "filesort_utils.h"
 #include "sql_const.h"
 #include "sql_sort.h"
 
@@ -46,40 +46,40 @@ double get_merge_many_buffs_cost_fast(ha
   ha_rows last_n_elems= num_rows % num_keys_per_buffer;
   double total_cost;
 
-  /* Calculate CPU cost of sorting buffers. */
+  // Calculate CPU cost of sorting buffers.
   total_cost=
     ( num_buffers * num_keys_per_buffer * log(1.0 + num_keys_per_buffer) +
       last_n_elems * log(1.0 + last_n_elems) )
     / TIME_FOR_COMPARE_ROWID;
   
-  /* Simulate behavior of merge_many_buff(). */
+  // Simulate behavior of merge_many_buff().
   while (num_buffers >= MERGEBUFF2)
   {
-    /* Calculate # of calls to merge_buffers() */
+    // Calculate # of calls to merge_buffers().
     const ha_rows loop_limit= num_buffers - MERGEBUFF*3/2;
     const ha_rows num_merge_calls= 1 + loop_limit/MERGEBUFF;
-    const ha_rows i_end_value= num_merge_calls * MERGEBUFF;
+    const ha_rows num_remaining_buffs=
+      num_buffers - num_merge_calls * MERGEBUFF;
 
-    /* Cost of merge sort 'num_merge_calls' */
+    // Cost of merge sort 'num_merge_calls'.
     total_cost+=
       num_merge_calls *
       get_merge_cost(num_keys_per_buffer * MERGEBUFF, MERGEBUFF, elem_size);
 
-    /* # of records in remaining buffers. */
-    last_n_elems+= (num_buffers - i_end_value) * num_keys_per_buffer;
+    // # of records in remaining buffers.
+    last_n_elems+= num_remaining_buffs * num_keys_per_buffer;
 
-    /* Cost of merge sort of remaining buffers. */
-    total_cost+= get_merge_cost(last_n_elems,
-                                num_buffers - i_end_value, elem_size);
+    // Cost of merge sort of remaining buffers.
+    total_cost+=
+      get_merge_cost(last_n_elems, 1 + num_remaining_buffs, elem_size);
 
     num_buffers= num_merge_calls;
     num_keys_per_buffer*= MERGEBUFF;
   }
 
-  /* Simulate final merge_buff call. */
+  // Simulate final merge_buff call.
   last_n_elems+= num_keys_per_buffer * num_buffers;
-  total_cost+= get_merge_cost(last_n_elems, num_buffers, elem_size);
-
+  total_cost+= get_merge_cost(last_n_elems, 1 + num_buffers, elem_size);
   return total_cost;
 }
 

=== added file 'sql/filesort_utils.h'
--- a/sql/filesort_utils.h	1970-01-01 00:00:00 +0000
+++ b/sql/filesort_utils.h	2010-12-01 14:18:38 +0000
@@ -0,0 +1,27 @@
+#ifndef FILESORT_UTILS_INCLUDED
+
+#include "my_base.h"
+
+/*
+  Calculate cost of merge sort
+
+    @param num_rows            Total number of rows.
+    @param num_keys_per_buffer Number of keys per buffer.
+    @param elem_size           Size of each element.
+
+    Calculates cost of merge sort by simulating call to merge_many_buff().
+
+  @retval
+    computed cost of merge sort
+
+  @note
+    Declared here in order to be able to unit test it,
+    since library dependencies have not been sorted out yet.
+*/
+
+#define FILESORT_UTILS_INCLUDED
+double get_merge_many_buffs_cost_fast(ha_rows num_rows,
+                                      ha_rows num_keys_per_buffer,
+                                      uint    elem_size);
+
+#endif  // FILESORT_UTILS_INCLUDED

=== modified file 'unittest/gunit/bounded_queue-t.cc'
--- a/unittest/gunit/bounded_queue-t.cc	2010-11-30 11:54:00 +0000
+++ b/unittest/gunit/bounded_queue-t.cc	2010-12-01 14:18:38 +0000
@@ -21,6 +21,7 @@
 #include <stddef.h>
 
 #include "bounded_queue.h"
+#include "filesort_utils.h"
 #include "my_sys.h"
 
 namespace {
@@ -130,6 +131,12 @@ protected:
     std::random_shuffle(&m_test_data[0], &m_test_data[array_size(m_test_data)]);
   }
 
+  void insert_test_data()
+  {
+    for (int ix= 0; ix < array_size(m_test_data); ++ix)
+      m_queue.push(&m_test_data[ix]);
+  }
+
   // Key pointers and data, used by the queue_xxx() functions.
   Key_container<num_elements / 2, Test_key> m_keys;
 
@@ -209,10 +216,7 @@ TEST_F(BoundedQueueTest, zero_size_queue
   EXPECT_EQ(0, m_queue.init(0, 0, true, test_key_compare,
                             m_key_size,
                             &test_keymaker, NULL, m_keys.key_ptrs));
-  for (int ix= 0; ix < array_size(m_test_data); ++ix)
-  {
-    m_queue.push(&m_test_data[ix]);
-  }
+  insert_test_data();
   EXPECT_EQ(1U, m_queue.num_elements());
 }
 
@@ -220,16 +224,36 @@ TEST_F(BoundedQueueTest, zero_size_queue
 /*
   Verifies that push and bounded size works, and that pop() gives sorted order.
  */
-TEST_F(BoundedQueueTest, push_and_pop)
+TEST_F(BoundedQueueTest, push_and_pop_keep_largest)
 {
-  EXPECT_EQ(0, m_queue.init(num_elements/2, 0, true, test_key_compare,
+  EXPECT_EQ(0, m_queue.init(num_elements/2, 0, false, test_key_compare,
                             m_key_size,
                             &test_keymaker, NULL, m_keys.key_ptrs));
-  for (int ix= 0; ix < array_size(m_test_data); ++ix)
+  insert_test_data();
+  // We expect the queue to contain [7 .. 13]
+  const int max_key_val= array_size(m_test_data) - 1;
+  while (m_queue.num_elements() > 0)
   {
-    m_queue.push(&m_test_data[ix]);
+    Test_key **top= m_queue.pop();
+    int expected_key_val= max_key_val - m_queue.num_elements();
+    int key_val= (*top)->key;
+    EXPECT_EQ(expected_key_val, key_val);
+    Test_element *element= (*top)->element;
+    EXPECT_EQ(expected_key_val, element->val);
   }
-  // We expect the queue to contain [0 .. n-1]
+}
+
+
+/*
+  Verifies that push and bounded size works, and that pop() gives sorted order.
+ */
+TEST_F(BoundedQueueTest, push_and_pop_keep_smallest)
+{
+  EXPECT_EQ(0, m_queue.init(num_elements/2, 0, true, test_key_compare,
+                            m_key_size,
+                            &test_keymaker, NULL, m_keys.key_ptrs));
+  insert_test_data();
+  // We expect the queue to contain [6 .. 0]
   while (m_queue.num_elements() > 0)
   {
     Test_key **top= m_queue.pop();
@@ -250,11 +274,7 @@ TEST_F(BoundedQueueTest, insert_and_sort
   EXPECT_EQ(0, m_queue.init(num_elements/2, 0, true, test_key_compare,
                             m_key_size,
                             &test_keymaker, NULL, m_keys.key_ptrs));
-  for (int ix= 0; ix < array_size(m_test_data); ++ix)
-  {
-    m_queue.push(&m_test_data[ix]);
-  }
-
+  insert_test_data();
   uchar *base=  (uchar*) &m_keys.key_ptrs[0];
   size_t size=  sizeof(Test_key);
   // We sort our keys as strings, so erase all the element pointers first.


Attachment: [text/bzr-bundle] bzr/tor.didriksen@oracle.com-20101201141838-xzyffh20fx9gttij.bundle
Thread
bzr commit into mysql-trunk-bugfixing branch (tor.didriksen:3252) WL#1393Tor Didriksen1 Dec