From: Tor Didriksen Date: November 24 2010 3:39pm Subject: Re: bzr commit into mysql-trunk branch (tor.didriksen:3247) WL#1393 List-Archive: http://lists.mysql.com/commits/124891 Message-Id: <4CED31BF.5050705@oracle.com> MIME-Version: 1.0 Content-Type: text/plain; charset=ISO-8859-1; format=flowed Content-Transfer-Encoding: 7bit On 2010-11-24 14:30, Jorgen Loland wrote: > Tor, > > Since you have committed a new version, this is a partial review of > the things I have already found. I'll continue by reviewing the latest > patch. Inline comments are tagged with JL > > > === added file 'sql/bounded_queue.cc' > > --- a/sql/bounded_queue.cc 1970-01-01 00:00:00 +0000 > > +++ b/sql/bounded_queue.cc 2010-11-22 11:45:40 +0000 > > @@ -0,0 +1,67 @@ > > +double get_merge_many_buffs_cost_fast(ha_rows num_buffers, ha_rows > max_n_elems, > > + ha_rows last_n_elems, uint > elem_size) > > +{ > > + double total_cost; > > + const double MERGEBUFF_double= (double) MERGEBUFF; > > + > > + /* Calc CPU cost of sorting each buffer */ > > JL: Calc CPU cost of sorting each buffer using quicksort? Whatever my_string_ptr_sort decides to do, radix or qsort > > > + total_cost= ( num_buffers * max_n_elems * log(1.0 + max_n_elems) + > > + last_n_elems * log(1.0 + last_n_elems) ) > > + / TIME_FOR_COMPARE_ROWID; > > + > > + /* Simulate behavior of merge_many_buff(). */ > > + while (num_buffers>= MERGEBUFF2) > > + { > > + /* Calculate # of calls to merge_buffers() */ > > + ha_rows loop_limit= num_buffers - MERGEBUFF*3/2; > > + ha_rows num_merge_calls= 1 + loop_limit/MERGEBUFF; > > + ha_rows i_end_value= num_merge_calls * MERGEBUFF; > > + > > + /* Cost of merge sort 'num_merge_calls' */ > > + total_cost+= num_merge_calls * > > + ( 2.0 * (max_n_elems * MERGEBUFF_double * elem_size) / IO_SIZE > > + + max_n_elems * MERGEBUFF_double * log(MERGEBUFF_double) / > > + (TIME_FOR_COMPARE_ROWID * M_LN2)); > > + /* # of records in last buffer */ > > JL: A more accurate comment would be "# of records in remaining buffers" yes > > > + last_n_elems+= (num_buffers - i_end_value) * max_n_elems; > > + > > + /* cost of merge sort of last buffer */ > > + total_cost+= 2.0 * ((double)last_n_elems * elem_size) / IO_SIZE > > + + double(last_n_elems) * log(MERGEBUFF_double) / > > + (TIME_FOR_COMPARE_ROWID * M_LN2); > > JL: You should do log(num_buffers - i_end_value) instead of > log(MERGEBUFF_double) indeed > > > === added file 'sql/bounded_queue.h' > > --- a/sql/bounded_queue.h 1970-01-01 00:00:00 +0000 > > +++ b/sql/bounded_queue.h 2010-11-22 11:45:40 +0000 > > @@ -0,0 +1,189 @@ > > +/* Copyright (c) 2010, Oracle and/or its affiliates. All rights > reserved. > > + > > + This program is free software; you can redistribute it and/or > modify > > + it under the terms of the GNU General Public License as > published by > > + the Free Software Foundation; version 2 of the License. > > + > > + This program is distributed in the hope that it will be useful, > > + but WITHOUT ANY WARRANTY; without even the implied warranty of > > + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the > > + GNU General Public License for more details. > > + > > + You should have received a copy of the GNU General Public License > > + along with this program; if not, write to the Free Software > > + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA > 02111-1307 USA */ > > + > > +#ifndef BOUNDED_QUEUE_INCLUDED > > +#define BOUNDED_QUEUE_INCLUDED > > + > > +#include > > +#include "my_global.h" > > +#include "my_base.h" > > +#include "my_sys.h" > > +#include "queues.h" > > + > > +class Sort_param; > > + > > +/** > > + A priority queue with a fixed, limited size. > > + > > + This is a wrapper on top of QUEUE and the queue_xxx() functions. > > + It keeps the top N elements which are inserted. > > + */ > > +template > > +class Bounded_queue > > +{ > > +public: > > + Bounded_queue() > > + { > > + memset(&m_queue, 0, sizeof(m_queue)); > > + } > > + > > + ~Bounded_queue() > > + { > > + delete_queue(&m_queue); > > + } > > + > > + /** > > + Function for making sort-key from input data. > > + @param param Sort parameters. > > + @param to Where to put the key. > > + @param from The input data. > > + */ > > + typedef void (*keymaker_function)(Sort_param *param, > > + Key_type *to, > > + Element_type *from); > > + > > + /** > > + Function for comparing two keys. > > + @param n Pointer to number of bytes to compare. > > JL: n? what about param v? typo, yes > > > + @param a First key. > > + @parm b Second key. > > JL: typo: parm > > > + @retval a<=> b. > > + */ > > + typedef int (*compare_function)(size_t *v, Key_type **a, Key_type > **b); > > + > > + /** > > + Initialize the queue. > > + > > + @param max_elements The size of the queue. > > + @param offset_to_key Offset to key in elements stored in the > queue. > > + @param max_at_top Set to 1 if you want biggest element on top. > > + @param compare Compare function for elements, takes 3 > arguments. > > + If NULL, we use > get_ptr_compare(compare_length). > > + @parem compare_length Lenght of the data used for sorting. > > + @param keymaker Function which generates keys for elements. > > + @param sort_param Sort parameters. > > + @param sort_keys Array of pointers to keys to sort. > > + > > + @retval 0 OK, 1 Could not allocate memory. > > + > > + We do *not* take ownership of any of the input pointer arguments. > > + */ > > + int init(ha_rows max_elements, uint offset_to_key, pbool max_at_top, > > + compare_function compare, size_t compare_length, > > + keymaker_function keymaker, Sort_param *sort_param, > > + Key_type **sort_keys); > > JL: Const pointers? Introducing const-correctness after-the-fact is very hard. We are passing this on to legacy code. > > > + > > + /** > > + Pushes an element on the queue. > > + If the queue is already full, we discard the smallest (or > biggest) element. > > + > > + @param element The element to be pushed. > > + */ > > + void push(Element_type *element); > > + > > + /** > > + Removes an element from the queue. > > + > > + @retval Pointer to the removed element. > > + */ > > + Key_type **pop() > > + { > > + return (Key_type**) queue_remove(&m_queue, 0); > > + } > > + > > + /** > > + The number of elements in the queue. > > + */ > > + uint num_elements() const { return m_queue.elements; } > > + > > + /** > > + Is the queue initialized? > > + */ > > + bool is_initialized() const { return m_queue.max_elements> 0; } > > + > > +private: > > + Key_type **m_sort_keys; > > + size_t m_compare_length; > > + keymaker_function m_keymaker; > > + Sort_param *m_sort_param; > > + st_queue m_queue; > > +}; > > + > > + > > +template > > +int Bounded_queue::init(ha_rows max_elements, > > + uint offset_to_key, > > + pbool max_at_top, > > + compare_function > compare, > > + size_t compare_length, > > + keymaker_function > keymaker, > > + Sort_param > *sort_param, > > + Key_type **sort_keys) > > +{ > > + DBUG_ASSERT(sort_keys != NULL); > > + > > + m_sort_keys= sort_keys; > > + m_compare_length= compare_length; > > + m_keymaker= keymaker; > > + m_sort_param= sort_param; > > + // init_queue() takes an uint, and also does (max_elements + 1) > > + if (max_elements>= (UINT_MAX - 1)) > > + return 1; > > + if (compare == NULL) > > + compare= > reinterpret_cast(get_ptr_compare(compare_length)); > > + // We allocate space for one extra element, for replace when > queue is full. > > + return init_queue(&m_queue, (uint) max_elements + 1, > offset_to_key, max_at_top, > > + > reinterpret_cast(compare),&m_compare_length); > > JL: Long line ahh, 81 rather than 80 ... > > > +} > > + > > + > > +template > > +void Bounded_queue::push(Element_type > *element) > > +{ > > + DBUG_ASSERT(is_initialized()); > > + if (queue_is_full((&m_queue))) > > + { > > JL: Comments like these would be nice: > > // Generate key and put it on top of the heap > > > + Key_type **pq_top= reinterpret_cast **>(queue_top(&m_queue)); > > + (*m_keymaker)(m_sort_param, *pq_top, element); > > // Move key to the right location in the heap. I don't agree, that's alomost like; ++it; // Increment iterator. we are calling queue_replaced() and queue_insert() respectively. > > > + queue_replaced(&m_queue); > > + } else { > > + (*m_keymaker)(m_sort_param, m_sort_keys[m_queue.elements], > element); > > + queue_insert(&m_queue, > > + > reinterpret_cast(&m_sort_keys[m_queue.elements])); > > + } > > +} > > + > > + > > +/* > > + Calculate cost of merge sort > > + > > + @param num_buffers # of merge buffers > > JL: Isn't num_buffers the number of *full* merge buffers, and > then there's a last buffer that is partially filled as well? This has been re-written now. > > > + @param max_n_elems max # of keys in memory buffer > > + @param last_n_elems # of keys in last buffer > > + @param elem_size Size of each element. > > + > > JL: In addition, now that you have worked with queue: feel free to > document the rest of st_queue if you have found something that is not > explicitly documented. > thank you very much.