From: Jorgen Loland Date: November 24 2010 1:30pm Subject: Re: bzr commit into mysql-trunk branch (tor.didriksen:3247) WL#1393 List-Archive: http://lists.mysql.com/commits/124875 Message-Id: <4CED137E.7070709@oracle.com> MIME-Version: 1.0 Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 8bit Tor, Since you have committed a new version, this is a partial review of the things I have already found. I'll continue by reviewing the latest patch. Inline comments are tagged with JL > === added file 'sql/bounded_queue.cc' > --- a/sql/bounded_queue.cc 1970-01-01 00:00:00 +0000 > +++ b/sql/bounded_queue.cc 2010-11-22 11:45:40 +0000 > @@ -0,0 +1,67 @@ > +double get_merge_many_buffs_cost_fast(ha_rows num_buffers, ha_rows max_n_elems, > + ha_rows last_n_elems, uint elem_size) > +{ > + double total_cost; > + const double MERGEBUFF_double= (double) MERGEBUFF; > + > + /* Calc CPU cost of sorting each buffer */ JL: Calc CPU cost of sorting each buffer using quicksort? > + total_cost= ( num_buffers * max_n_elems * log(1.0 + max_n_elems) + > + last_n_elems * log(1.0 + last_n_elems) ) > + / TIME_FOR_COMPARE_ROWID; > + > + /* Simulate behavior of merge_many_buff(). */ > + while (num_buffers>= MERGEBUFF2) > + { > + /* Calculate # of calls to merge_buffers() */ > + ha_rows loop_limit= num_buffers - MERGEBUFF*3/2; > + ha_rows num_merge_calls= 1 + loop_limit/MERGEBUFF; > + ha_rows i_end_value= num_merge_calls * MERGEBUFF; > + > + /* Cost of merge sort 'num_merge_calls' */ > + total_cost+= num_merge_calls * > + ( 2.0 * (max_n_elems * MERGEBUFF_double * elem_size) / IO_SIZE > + + max_n_elems * MERGEBUFF_double * log(MERGEBUFF_double) / > + (TIME_FOR_COMPARE_ROWID * M_LN2)); > + /* # of records in last buffer */ JL: A more accurate comment would be "# of records in remaining buffers" > + last_n_elems+= (num_buffers - i_end_value) * max_n_elems; > + > + /* cost of merge sort of last buffer */ > + total_cost+= 2.0 * ((double)last_n_elems * elem_size) / IO_SIZE > + + double(last_n_elems) * log(MERGEBUFF_double) / > + (TIME_FOR_COMPARE_ROWID * M_LN2); JL: You should do log(num_buffers - i_end_value) instead of log(MERGEBUFF_double) > === added file 'sql/bounded_queue.h' > --- a/sql/bounded_queue.h 1970-01-01 00:00:00 +0000 > +++ b/sql/bounded_queue.h 2010-11-22 11:45:40 +0000 > @@ -0,0 +1,189 @@ > +/* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved. > + > + This program is free software; you can redistribute it and/or modify > + it under the terms of the GNU General Public License as published by > + the Free Software Foundation; version 2 of the License. > + > + This program is distributed in the hope that it will be useful, > + but WITHOUT ANY WARRANTY; without even the implied warranty of > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > + GNU General Public License for more details. > + > + You should have received a copy of the GNU General Public License > + along with this program; if not, write to the Free Software > + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ > + > +#ifndef BOUNDED_QUEUE_INCLUDED > +#define BOUNDED_QUEUE_INCLUDED > + > +#include > +#include "my_global.h" > +#include "my_base.h" > +#include "my_sys.h" > +#include "queues.h" > + > +class Sort_param; > + > +/** > + A priority queue with a fixed, limited size. > + > + This is a wrapper on top of QUEUE and the queue_xxx() functions. > + It keeps the top N elements which are inserted. > + */ > +template > +class Bounded_queue > +{ > +public: > + Bounded_queue() > + { > + memset(&m_queue, 0, sizeof(m_queue)); > + } > + > + ~Bounded_queue() > + { > + delete_queue(&m_queue); > + } > + > + /** > + Function for making sort-key from input data. > + @param param Sort parameters. > + @param to Where to put the key. > + @param from The input data. > + */ > + typedef void (*keymaker_function)(Sort_param *param, > + Key_type *to, > + Element_type *from); > + > + /** > + Function for comparing two keys. > + @param n Pointer to number of bytes to compare. JL: n? what about param v? > + @param a First key. > + @parm b Second key. JL: typo: parm > + @retval a<=> b. > + */ > + typedef int (*compare_function)(size_t *v, Key_type **a, Key_type **b); > + > + /** > + Initialize the queue. > + > + @param max_elements The size of the queue. > + @param offset_to_key Offset to key in elements stored in the queue. > + @param max_at_top Set to 1 if you want biggest element on top. > + @param compare Compare function for elements, takes 3 arguments. > + If NULL, we use get_ptr_compare(compare_length). > + @parem compare_length Lenght of the data used for sorting. > + @param keymaker Function which generates keys for elements. > + @param sort_param Sort parameters. > + @param sort_keys Array of pointers to keys to sort. > + > + @retval 0 OK, 1 Could not allocate memory. > + > + We do *not* take ownership of any of the input pointer arguments. > + */ > + int init(ha_rows max_elements, uint offset_to_key, pbool max_at_top, > + compare_function compare, size_t compare_length, > + keymaker_function keymaker, Sort_param *sort_param, > + Key_type **sort_keys); JL: Const pointers? > + > + /** > + Pushes an element on the queue. > + If the queue is already full, we discard the smallest (or biggest) element. > + > + @param element The element to be pushed. > + */ > + void push(Element_type *element); > + > + /** > + Removes an element from the queue. > + > + @retval Pointer to the removed element. > + */ > + Key_type **pop() > + { > + return (Key_type**) queue_remove(&m_queue, 0); > + } > + > + /** > + The number of elements in the queue. > + */ > + uint num_elements() const { return m_queue.elements; } > + > + /** > + Is the queue initialized? > + */ > + bool is_initialized() const { return m_queue.max_elements> 0; } > + > +private: > + Key_type **m_sort_keys; > + size_t m_compare_length; > + keymaker_function m_keymaker; > + Sort_param *m_sort_param; > + st_queue m_queue; > +}; > + > + > +template > +int Bounded_queue::init(ha_rows max_elements, > + uint offset_to_key, > + pbool max_at_top, > + compare_function compare, > + size_t compare_length, > + keymaker_function keymaker, > + Sort_param *sort_param, > + Key_type **sort_keys) > +{ > + DBUG_ASSERT(sort_keys != NULL); > + > + m_sort_keys= sort_keys; > + m_compare_length= compare_length; > + m_keymaker= keymaker; > + m_sort_param= sort_param; > + // init_queue() takes an uint, and also does (max_elements + 1) > + if (max_elements>= (UINT_MAX - 1)) > + return 1; > + if (compare == NULL) > + compare= reinterpret_cast(get_ptr_compare(compare_length)); > + // We allocate space for one extra element, for replace when queue is full. > + return init_queue(&m_queue, (uint) max_elements + 1, offset_to_key, max_at_top, > + reinterpret_cast(compare),&m_compare_length); JL: Long line > +} > + > + > +template > +void Bounded_queue::push(Element_type *element) > +{ > + DBUG_ASSERT(is_initialized()); > + if (queue_is_full((&m_queue))) > + { JL: Comments like these would be nice: // Generate key and put it on top of the heap > + Key_type **pq_top= reinterpret_cast(queue_top(&m_queue)); > + (*m_keymaker)(m_sort_param, *pq_top, element); // Move key to the right location in the heap. > + queue_replaced(&m_queue); > + } else { > + (*m_keymaker)(m_sort_param, m_sort_keys[m_queue.elements], element); > + queue_insert(&m_queue, > + reinterpret_cast(&m_sort_keys[m_queue.elements])); > + } > +} > + > + > +/* > + Calculate cost of merge sort > + > + @param num_buffers # of merge buffers JL: Isn't num_buffers the number of *full* merge buffers, and then there's a last buffer that is partially filled as well? > + @param max_n_elems max # of keys in memory buffer > + @param last_n_elems # of keys in last buffer > + @param elem_size Size of each element. > + JL: In addition, now that you have worked with queue: feel free to document the rest of st_queue if you have found something that is not explicitly documented. -- Jørgen Løland | Senior Software Engineer | +47 73842138 Oracle MySQL Trondheim, Norway