From: Bjorn Munch Date: February 24 2011 2:53pm Subject: bzr commit into mysql-trunk-mtr branch (bjorn.munch:3032) List-Archive: http://lists.mysql.com/commits/132030 Message-Id: <201102241453.p1OErgPW019479@khepri15.norway.sun.com> MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit #At file:///home/bm136801/my/mtr-tr/ based on revid:bjorn.munch@stripped 3032 Bjorn Munch 2011-02-24 [merge] merge from trunk removed: storage/innobase/include/ut0bh.h storage/innobase/ut/ut0bh.c added: storage/innobase/include/ut0bh.h storage/innobase/include/ut0bh.ic storage/innobase/ut/ut0bh.c modified: mysql-test/r/filesort_debug.result mysql-test/suite/sys_vars/r/all_vars.result mysql-test/t/filesort_debug.test sql/filesort.cc storage/innobase/CMakeLists.txt storage/innobase/handler/ha_innodb.cc storage/innobase/include/buf0flu.h storage/innobase/include/srv0srv.h storage/innobase/include/sync0sync.h storage/innobase/include/trx0purge.h storage/innobase/include/trx0rseg.h storage/innobase/include/trx0sys.h storage/innobase/include/trx0sys.ic storage/innobase/srv/srv0srv.c storage/innobase/srv/srv0start.c storage/innobase/sync/sync0sync.c storage/innobase/trx/trx0purge.c storage/innobase/trx/trx0rseg.c storage/innobase/trx/trx0sys.c storage/innobase/trx/trx0trx.c storage/innobase/ut/ut0ut.c === modified file 'mysql-test/r/filesort_debug.result' --- a/mysql-test/r/filesort_debug.result 2011-02-02 13:41:10 +0000 +++ b/mysql-test/r/filesort_debug.result 2011-02-24 07:18:03 +0000 @@ -26,3 +26,20 @@ DELETE FROM t1 ORDER BY (f1(10)) LIMIT 1 ERROR 42000: Incorrect number of arguments for FUNCTION test.f1; expected 0, got 1 DROP TABLE t1; DROP FUNCTION f1; +# +# Bug #11747102 +# 30771: LOG MORE INFO ABOUT THREADS KILL'D AND SORT ABORTED MESSAGES +# +# connection 1 +CREATE TABLE t1(f0 int auto_increment primary key, f1 int); +INSERT INTO t1(f1) VALUES (0),(1),(2),(3),(4),(5); +SET DEBUG_SYNC='filesort_start SIGNAL filesort_started WAIT_FOR filesort_killed'; +# Sending: (not reaped since connection is killed later) +SELECT * FROM t1 ORDER BY f1 ASC, f0; +# connection 2 +SET DEBUG_SYNC='now WAIT_FOR filesort_started'; +KILL @id; +SET DEBUG_SYNC='now SIGNAL filesort_killed'; +# connection default +SET DEBUG_SYNC= "RESET"; +DROP TABLE t1; === modified file 'mysql-test/suite/sys_vars/r/all_vars.result' --- a/mysql-test/suite/sys_vars/r/all_vars.result 2011-02-17 14:06:09 +0000 +++ b/mysql-test/suite/sys_vars/r/all_vars.result 2011-02-23 14:57:29 +0000 @@ -13,6 +13,7 @@ select variable_name as `There should be left join t1 on variable_name=test_name where test_name is null; There should be *no* variables listed below: INNODB_STATS_TRANSIENT_SAMPLE_PAGES +INNODB_ROLLBACK_SEGMENTS INNODB_STATS_PERSISTENT_SAMPLE_PAGES RELAY_LOG_BASENAME LOG_BIN_BASENAME @@ -26,6 +27,7 @@ INNODB_MONITOR_DISABLE INNODB_FILE_FORMAT_MAX INNODB_MONITOR_ENABLE INNODB_STATS_TRANSIENT_SAMPLE_PAGES +INNODB_ROLLBACK_SEGMENTS INNODB_STATS_PERSISTENT_SAMPLE_PAGES RELAY_LOG_BASENAME LOG_BIN_BASENAME === modified file 'mysql-test/t/filesort_debug.test' --- a/mysql-test/t/filesort_debug.test 2011-02-02 13:41:10 +0000 +++ b/mysql-test/t/filesort_debug.test 2011-02-24 07:18:03 +0000 @@ -1,4 +1,6 @@ --source include/have_debug.inc +--source include/have_debug_sync.inc +--source include/count_sessions.inc SET @old_debug= @@session.debug; @@ -38,3 +40,37 @@ DELETE FROM t1 ORDER BY (f1(10)) LIMIT 1 DROP TABLE t1; DROP FUNCTION f1; + +--echo # +--echo # Bug #11747102 +--echo # 30771: LOG MORE INFO ABOUT THREADS KILL'D AND SORT ABORTED MESSAGES +--echo # + +connect (con1, localhost, root); +connect (con2, localhost, root); + +--echo # connection 1 +connection con1; +CREATE TABLE t1(f0 int auto_increment primary key, f1 int); +INSERT INTO t1(f1) VALUES (0),(1),(2),(3),(4),(5); + +let $ID= `SELECT @id := CONNECTION_ID()`; + +SET DEBUG_SYNC='filesort_start SIGNAL filesort_started WAIT_FOR filesort_killed'; +--echo # Sending: (not reaped since connection is killed later) +--send SELECT * FROM t1 ORDER BY f1 ASC, f0 + +--echo # connection 2 +connection con2; +let $ignore= `SELECT @id := $ID`; +SET DEBUG_SYNC='now WAIT_FOR filesort_started'; +KILL @id; +SET DEBUG_SYNC='now SIGNAL filesort_killed'; + +--echo # connection default +connection default; +disconnect con1; +disconnect con2; +--source include/wait_until_count_sessions.inc +SET DEBUG_SYNC= "RESET"; +DROP TABLE t1; === modified file 'sql/filesort.cc' --- a/sql/filesort.cc 2011-02-02 13:41:10 +0000 +++ b/sql/filesort.cc 2011-02-24 07:18:03 +0000 @@ -35,6 +35,7 @@ #include "bounded_queue.h" #include "filesort_utils.h" #include "sql_select.h" +#include "debug_sync.h" #ifdef HAVE_EXPLICIT_TEMPLATE_INSTANTIATION template class Bounded_queue; @@ -164,6 +165,7 @@ ha_rows filesort(THD *thd, TABLE *table, Item_subselect *subselect= tab ? tab->containing_subselect() : 0; MYSQL_FILESORT_START(table->s->db.str, table->s->table_name.str); + DEBUG_SYNC(thd, "filesort_start"); /* Release InnoDB's adaptive hash index latch (if holding) before @@ -357,12 +359,13 @@ ha_rows filesort(THD *thd, TABLE *table, } if (error) { - DBUG_ASSERT(thd->is_error()); + int kill_errno= thd->killed_errno(); + DBUG_ASSERT(thd->is_error() || kill_errno); my_printf_error(ER_FILSORT_ABORT, "%s: %s", MYF(ME_ERROR + ME_WAITTANG), ER_THD(thd, ER_FILSORT_ABORT), - thd->stmt_da->message()); + kill_errno ? ER(kill_errno) : thd->stmt_da->message()); if (global_system_variables.log_warnings > 1) { === modified file 'storage/innobase/CMakeLists.txt' --- a/storage/innobase/CMakeLists.txt 2011-01-21 16:14:47 +0000 +++ b/storage/innobase/CMakeLists.txt 2011-02-22 05:11:15 +0000 @@ -1,4 +1,4 @@ -# Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. +# Copyright (c) 2006, 2011, Oracle and/or its affiliates. All rights reserved. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by === modified file 'storage/innobase/handler/ha_innodb.cc' --- a/storage/innobase/handler/ha_innodb.cc 2011-02-18 13:00:57 +0000 +++ b/storage/innobase/handler/ha_innodb.cc 2011-02-23 17:35:07 +0000 @@ -272,7 +272,7 @@ static PSI_mutex_info all_innodb_mutexes # endif /* UNIV_MEM_DEBUG */ {&mem_pool_mutex_key, "mem_pool_mutex", 0}, {&mutex_list_mutex_key, "mutex_list_mutex", 0}, - {&purge_sys_mutex_key, "purge_sys_mutex", 0}, + {&purge_sys_bh_mutex_key, "purge_sys_bh_mutex", 0}, {&recv_sys_mutex_key, "recv_sys_mutex", 0}, {&rseg_mutex_key, "rseg_mutex", 0}, # ifdef UNIV_SYNC_DEBUG @@ -12064,13 +12064,20 @@ static MYSQL_SYSVAR_ULONG(io_capacity, s static MYSQL_SYSVAR_ULONG(purge_batch_size, srv_purge_batch_size, PLUGIN_VAR_OPCMDARG, - "Number of UNDO logs to purge in one batch from the history list. " - "Default is 20", + "Number of UNDO log pages to purge in one batch from the history list.", NULL, NULL, 20, /* Default setting */ 1, /* Minimum value */ 5000, 0); /* Maximum value */ +static MYSQL_SYSVAR_ULONG(rollback_segments, srv_rollback_segments, + PLUGIN_VAR_OPCMDARG, + "Number of UNDO logs to use.", + NULL, NULL, + 128, /* Default setting */ + 1, /* Minimum value */ + TRX_SYS_N_RSEGS, 0); /* Maximum value */ + static MYSQL_SYSVAR_ULONG(purge_threads, srv_n_purge_threads, PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY, "Purge threads can be from 0 to 32. Default is 0.", @@ -12494,6 +12501,7 @@ static struct st_mysql_sys_var* innobase MYSQL_SYSVAR(page_hash_locks), #endif /* defined UNIV_DEBUG || defined UNIV_PERF_DEBUG */ MYSQL_SYSVAR(print_all_deadlocks), + MYSQL_SYSVAR(rollback_segments), NULL }; === modified file 'storage/innobase/include/buf0flu.h' --- a/storage/innobase/include/buf0flu.h 2010-12-14 04:45:32 +0000 +++ b/storage/innobase/include/buf0flu.h 2011-02-22 05:11:15 +0000 @@ -141,7 +141,18 @@ UNIV_INTERN void buf_flush_wait_batch_end( /*=====================*/ - buf_pool_t* buf_pool, /*!< buffer pool instance */ + buf_pool_t* buf_pool, /*!< in: buffer pool instance */ + enum buf_flush type); /*!< in: BUF_FLUSH_LRU + or BUF_FLUSH_LIST */ +/******************************************************************//** +Waits until a flush batch of the given type ends. This is called by +a thread that only wants to wait for a flush to end but doesn't do +any flushing itself. */ +UNIV_INTERN +void +buf_flush_wait_batch_end_wait_only( +/*===============================*/ + buf_pool_t* buf_pool, /*!< in: buffer pool instance */ enum buf_flush type); /*!< in: BUF_FLUSH_LRU or BUF_FLUSH_LIST */ /********************************************************************//** === modified file 'storage/innobase/include/srv0srv.h' --- a/storage/innobase/include/srv0srv.h 2011-02-10 15:01:01 +0000 +++ b/storage/innobase/include/srv0srv.h 2011-02-22 05:11:15 +0000 @@ -297,9 +297,12 @@ extern ulint srv_log_waits; /* the number of purge threads to use from the worker pool (currently 0 or 1) */ extern ulong srv_n_purge_threads; -/* the number of records to purge in one batch */ +/* the number of pages to purge in one batch */ extern ulong srv_purge_batch_size; +/* the number of rollback segments to use */ +extern ulong srv_rollback_segments; + /* variable that counts amount of data read in total (in bytes) */ extern ulint srv_data_read; === modified file 'storage/innobase/include/sync0sync.h' --- a/storage/innobase/include/sync0sync.h 2011-02-15 09:40:34 +0000 +++ b/storage/innobase/include/sync0sync.h 2011-02-22 05:11:15 +0000 @@ -90,7 +90,7 @@ extern mysql_pfs_key_t mem_hash_mutex_ke # endif /* UNIV_MEM_DEBUG */ extern mysql_pfs_key_t mem_pool_mutex_key; extern mysql_pfs_key_t mutex_list_mutex_key; -extern mysql_pfs_key_t purge_sys_mutex_key; +extern mysql_pfs_key_t purge_sys_bh_mutex_key; extern mysql_pfs_key_t recv_sys_mutex_key; extern mysql_pfs_key_t rseg_mutex_key; # ifdef UNIV_SYNC_DEBUG @@ -660,7 +660,6 @@ or row lock! */ #define SYNC_TREE_NODE_NEW 892 #define SYNC_TREE_NODE_FROM_HASH 891 #define SYNC_TREE_NODE 890 -#define SYNC_PURGE_SYS 810 #define SYNC_PURGE_LATCH 800 #define SYNC_TRX_UNDO 700 #define SYNC_RSEG 600 @@ -686,6 +685,7 @@ or row lock! */ #define SYNC_THREADS 295 #define SYNC_REC_LOCK 294 #define SYNC_TRX_SYS_HEADER 290 +#define SYNC_PURGE_QUEUE 200 #define SYNC_LOG 170 #define SYNC_LOG_FLUSH_ORDER 147 #define SYNC_RECV 168 === modified file 'storage/innobase/include/trx0purge.h' --- a/storage/innobase/include/trx0purge.h 2011-01-05 22:49:20 +0000 +++ b/storage/innobase/include/trx0purge.h 2011-02-22 05:11:15 +0000 @@ -1,6 +1,6 @@ /***************************************************************************** -Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved. +Copyright (c) 1996, 2011, Innobase Oy. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software @@ -70,7 +70,8 @@ UNIV_INTERN void trx_purge_sys_create( /*=================*/ - ulint n_purge_threads); /*!< in: number of purge threads */ + ulint n_purge_threads,/*!< in: number of purge threads */ + ib_bh_t* ib_bh); /*!< in/own: UNDO log min binary heap*/ /********************************************************************//** Frees the global purge system control structure. */ UNIV_INTERN @@ -140,9 +141,10 @@ struct trx_purge_struct{ worker threads */ ulint n_completed; /*!< Count of total tasks completed */ - mutex_t mutex; /*!< Mutex protecting the fields - below */ - /*-----------------------------*/ + /*------------------------------*/ + /* The following two fields form the 'purge pointer' which advances + during a purge, and which is used in history list truncation */ + purge_iter_t iter; /* Limit up to which we have read and parsed the UNDO log records. Not necessarily purged from the indexes. @@ -173,6 +175,11 @@ struct trx_purge_struct{ mem_heap_t* heap; /*!< Temporary storage used during a purge: can be emptied after purge completes */ + /*-----------------------------*/ + ib_bh_t* ib_bh; /*!< Binary min-heap, ordered on + rseg_queue_t::trx_no. It is protected + by the bh_mutex */ + mutex_t bh_mutex; /*!< Mutex protecting ib_bh */ }; /** Info required to purge a record */ @@ -183,19 +190,6 @@ struct trx_purge_rec_struct { typedef struct trx_purge_rec_struct trx_purge_rec_t; -/** Test if purge mutex is owned. */ -#define purge_mutex_own() mutex_own(&purge_sys->mutex) - -/** Acquire the flush list mutex. */ -#define purge_mutex_enter() do { \ - mutex_enter(&purge_sys->mutex); \ -} while (0) - -/** Release the purge mutex. */ -# define purge_mutex_exit() do { \ - mutex_exit(&purge_sys->mutex); \ -} while (0) - #ifndef UNIV_NONINL #include "trx0purge.ic" #endif === modified file 'storage/innobase/include/trx0rseg.h' --- a/storage/innobase/include/trx0rseg.h 2011-01-18 21:14:22 +0000 +++ b/storage/innobase/include/trx0rseg.h 2011-02-22 05:11:15 +0000 @@ -113,6 +113,7 @@ void trx_rseg_array_init( /*================*/ trx_sysf_t* sys_header, /*!< in/out: trx system header */ + ib_bh_t* ib_bh, /*!< in: rseg queue */ mtr_t* mtr); /*!< in/out: mtr */ /*************************************************************************** Free's an instance of the rollback segment in memory. */ === modified file 'storage/innobase/include/trx0sys.h' --- a/storage/innobase/include/trx0sys.h 2011-02-10 01:56:07 +0000 +++ b/storage/innobase/include/trx0sys.h 2011-02-22 05:11:15 +0000 @@ -38,6 +38,7 @@ Created 3/26/1996 Heikki Tuuri #include "mem0mem.h" #include "sync0sync.h" #include "ut0lst.h" +#include "ut0bh.h" #include "read0types.h" #include "page0types.h" #include "ut0bh.h" @@ -124,9 +125,10 @@ trx_sys_hdr_page( ulint page_no);/*!< in: page number */ /*****************************************************************//** Creates and initializes the central memory structures for the transaction -system. This is called when the database is started. */ +system. This is called when the database is started. +@return min binary heap of rsegs to purge */ UNIV_INTERN -void +ib_bh_t* trx_sys_init_at_db_start(void); /*==========================*/ /*****************************************************************//** @@ -696,9 +698,6 @@ struct trx_sys_struct{ list (update undo logs for committed transactions), protected by rseg->mutex */ - ib_bh_t* ib_bh; /*!< Binary min-heap, ordered on - rseg_queue_t::trx_no. It is protected - by the purge mutex */ mutex_t read_view_mutex;/*!< Protects the view_list */ UT_LIST_BASE_NODE_T(read_view_t) view_list; /*!< List of read views sorted === modified file 'storage/innobase/include/trx0sys.ic' --- a/storage/innobase/include/trx0sys.ic 2011-01-24 11:39:09 +0000 +++ b/storage/innobase/include/trx0sys.ic 2011-02-22 05:11:15 +0000 @@ -504,5 +504,4 @@ trx_sys_get_n_trx(void) return(n_trx); } - #endif /* !UNIV_HOTBACKUP */ === added file 'storage/innobase/include/ut0bh.h' --- a/storage/innobase/include/ut0bh.h 1970-01-01 00:00:00 +0000 +++ b/storage/innobase/include/ut0bh.h 2011-02-23 06:48:15 +0000 @@ -0,0 +1,152 @@ +/***************************************************************************//** + +Copyright (c) 2011, Oracle Corpn. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/******************************************************************//** +@file include/ut0bh.h +Binary min-heap interface. + +Created 2010-05-28 by Sunny Bains +*******************************************************/ + +#ifndef INNOBASE_UT0BH_H +#define INNOBASE_UT0BH_H + +#include "univ.i" + +/** Comparison function for objects in the binary heap. */ +typedef int (*ib_bh_cmp_t)(const void* p1, const void* p2); + +typedef struct ib_bh_struct ib_bh_t; + +/**********************************************************************//** +Get the number of elements in the binary heap. +@return number of elements */ +UNIV_INLINE +ulint +ib_bh_size( +/*=======*/ + const ib_bh_t* ib_bh); /*!< in: instance */ + +/**********************************************************************//** +Test if binary heap is empty. +@return TRUE if empty. */ +UNIV_INLINE +ibool +ib_bh_is_empty( +/*===========*/ + const ib_bh_t* ib_bh); /*!< in: instance */ + +/**********************************************************************//** +Test if binary heap is full. +@return TRUE if full. */ +UNIV_INLINE +ibool +ib_bh_is_full( +/*===========*/ + const ib_bh_t* ib_bh); /*!< in: instance */ + +/**********************************************************************//** +Get a pointer to the element. +@return pointer to element */ +UNIV_INLINE +void* +ib_bh_get( +/*=======*/ + ib_bh_t* ib_bh, /*!< in: instance */ + ulint i); /*!< in: index */ + +/**********************************************************************//** +Copy an element to the binary heap. +@return pointer to copied element */ +UNIV_INLINE +void* +ib_bh_set( +/*======*/ + ib_bh_t* ib_bh, /*!< in/out: instance */ + ulint i, /*!< in: index */ + const void* elem); /*!< in: element to add */ + +/**********************************************************************//** +Return the first element from the binary heap. +@return pointer to first element or NULL if empty. */ +UNIV_INLINE +void* +ib_bh_first( +/*========*/ + ib_bh_t* ib_bh); /*!< in: instance */ + +/**********************************************************************//** +Return the last element from the binary heap. +@return pointer to last element or NULL if empty. */ +UNIV_INLINE +void* +ib_bh_last( +/*========*/ + ib_bh_t* ib_bh); /*!< in/out: instance */ + +/**********************************************************************//** +Create a binary heap. +@return a new binary heap */ +UNIV_INTERN +ib_bh_t* +ib_bh_create( +/*=========*/ + ib_bh_cmp_t compare, /*!< in: comparator */ + ulint sizeof_elem, /*!< in: size of one element */ + ulint max_elems); /*!< in: max elements allowed */ + +/**********************************************************************//** +Free a binary heap. +@return a new binary heap */ +UNIV_INTERN +void +ib_bh_free( +/*=======*/ + ib_bh_t* ib_bh); /*!< in,own: instance */ + +/**********************************************************************//** +Add an element to the binary heap. Note: The element is copied. +@return pointer to added element or NULL if full. */ +UNIV_INTERN +void* +ib_bh_push( +/*=======*/ + ib_bh_t* ib_bh, /*!< in/out: instance */ + const void* elem); /*!< in: element to add */ + +/**********************************************************************//** +Remove the first element from the binary heap. */ +UNIV_INTERN +void +ib_bh_pop( +/*======*/ + ib_bh_t* ib_bh); /*!< in/out: instance */ + +/** Binary heap data structure */ +struct ib_bh_struct { + ulint max_elems; /*!< max elements allowed */ + ulint n_elems; /*!< current size */ + ulint sizeof_elem; /*!< sizeof element */ + ib_bh_cmp_t compare; /*!< comparator */ +}; + +#ifndef UNIV_NONINL +#include "ut0bh.ic" +#endif + +#endif /* INNOBASE_UT0BH_H */ === removed file 'storage/innobase/include/ut0bh.h' --- a/storage/innobase/include/ut0bh.h 2010-06-15 03:48:34 +0000 +++ b/storage/innobase/include/ut0bh.h 1970-01-01 00:00:00 +0000 @@ -1,145 +0,0 @@ -/***************************************************************************//** - -Copyright (c) 2010, Oracle Corpn. All Rights Reserved. - -Portions of this file contain modifications contributed and copyrighted by -Sun Microsystems, Inc. Those modifications are gratefully acknowledged and -are described briefly in the InnoDB documentation. The contributions by -Sun Microsystems are incorporated with their permission, and subject to the -conditions contained in the file COPYING.Sun_Microsystems. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA - -*****************************************************************************/ - -/******************************************************************//** -@file include/ut0bh.h -Binary min-heap interface. - -Created 2010-05-28 by Sunny Bains -*******************************************************/ - -#ifndef INNOBASE_UT0BH_H -#define INNOBASE_UT0BH_H - -#include "univ.i" - -typedef int (*ib_bh_cmp_t)(const void* p1, const void* p2); - -typedef struct ib_bh_struct ib_bh_t; - -/**********************************************************************//** -Get the number of elements in the binary heap. -@return number of elements */ -UNIV_INTERN -ulint -ib_bh_size( -/*=======*/ - const ib_bh_t* ib_bh); /*!< in: instance */ - -/**********************************************************************//** -Test if binary heap is empty. -@return TRUE if empty. */ -UNIV_INTERN -ibool -ib_bh_is_empty( -/*===========*/ - const ib_bh_t* ib_bh); /*!< in: instance */ - -/**********************************************************************//** -Test if binary heap is full. -@return TRUE if full. */ -UNIV_INTERN -ibool -ib_bh_is_full( -/*===========*/ - const ib_bh_t* ib_bh); /*!< in: instance */ - -/**********************************************************************//** -Get a pointer to the element. -@return pointer to element */ -UNIV_INTERN -void* -ib_bh_get( -/*=======*/ - ib_bh_t* ib_bh, /*!< in: instance */ - ulint i); /*!< in: index */ - -/**********************************************************************//** -Copy an element to the binary heap. -@return pointer to copied element */ -UNIV_INTERN -void* -ib_bh_set( -/*======*/ - ib_bh_t* ib_bh, /*!< in,out: instance */ - ulint i, /*!< in: index */ - const void* elem); /*!< in: element to add */ - -/**********************************************************************//** -Create a binary heap. -@return a new binary heap */ -UNIV_INTERN -ib_bh_t* -ib_bh_create( -/*=========*/ - ib_bh_cmp_t compare, /*!< in: comparator */ - ulint sizeof_elem, /*!< in: size of one element */ - ulint max_elems); /*!< in: max elements allowed */ - -/**********************************************************************//** -Free a binary heap. -@return a new binary heap */ -UNIV_INTERN -void -ib_bh_free( -/*=======*/ - ib_bh_t* ib_bh); /*!< in,own: instance */ - -/**********************************************************************//** -Add an element to the binary heap. Note: The element is copied. -@return pointer to added element or NULL if full. */ -UNIV_INTERN -void* -ib_bh_push( -/*=======*/ - ib_bh_t* ib_bh, /*!< in,out: instance */ - const void* elem); /*!< in: element to add */ - -/**********************************************************************//** -Return the first element from the binary heap. -@return pointer to first element or NULL if empty. */ -UNIV_INTERN -void* -ib_bh_first( -/*========*/ - ib_bh_t* ib_bh); /*!< in,out: instance */ - -/**********************************************************************//** -Return the last element from the binary heap. -@return pointer to last element or NULL if empty. */ -UNIV_INTERN -void* -ib_bh_last( -/*========*/ - ib_bh_t* ib_bh); /*!< in,out: instance */ - -/**********************************************************************//** -Remove the first element from the binary heap. */ -UNIV_INTERN -void -ib_bh_pop( -/*======*/ - ib_bh_t* ib_bh); /*!< in,out: instance */ - -#endif /* INNOBASE_UT0BH_H */ === added file 'storage/innobase/include/ut0bh.ic' --- a/storage/innobase/include/ut0bh.ic 1970-01-01 00:00:00 +0000 +++ b/storage/innobase/include/ut0bh.ic 2011-02-22 20:24:34 +0000 @@ -0,0 +1,125 @@ +/***************************************************************************//** +Copyright (c) 2011, Oracle Corpn. All Rights Reserved. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/******************************************************************//** +@file include/ut0bh.ic +Binary min-heap implementation. + +Created 2011-01-15 by Sunny Bains +*******************************************************/ + +#include "ut0bh.h" +#include "ut0mem.h" /* For ut_memcpy() */ + +/**********************************************************************//** +Get the number of elements in the binary heap. +@return number of elements */ +UNIV_INLINE +ulint +ib_bh_size( +/*=======*/ + const ib_bh_t* ib_bh) /*!< in: instance */ +{ + return(ib_bh->n_elems); +} + +/**********************************************************************//** +Test if binary heap is empty. +@return TRUE if empty. */ +UNIV_INLINE +ibool +ib_bh_is_empty( +/*===========*/ + const ib_bh_t* ib_bh) /*!< in: instance */ +{ + return(ib_bh_size(ib_bh) == 0); +} + +/**********************************************************************//** +Test if binary heap is full. +@return TRUE if full. */ +UNIV_INLINE +ibool +ib_bh_is_full( +/*===========*/ + const ib_bh_t* ib_bh) /*!< in: instance */ +{ + return(ib_bh_size(ib_bh) >= ib_bh->max_elems); +} + +/**********************************************************************//** +Get a pointer to the element. +@return pointer to element */ +UNIV_INLINE +void* +ib_bh_get( +/*=======*/ + ib_bh_t* ib_bh, /*!< in: instance */ + ulint i) /*!< in: index */ +{ + byte* ptr = (byte*) (ib_bh + 1); + + ut_a(i < ib_bh_size(ib_bh)); + + return(ptr + (ib_bh->sizeof_elem * i)); +} + +/**********************************************************************//** +Copy an element to the binary heap. +@return pointer to copied element */ +UNIV_INLINE +void* +ib_bh_set( +/*======*/ + ib_bh_t* ib_bh, /*!< in/out: instance */ + ulint i, /*!< in: index */ + const void* elem) /*!< in: element to add */ +{ + void* ptr = ib_bh_get(ib_bh, i); + + ut_memcpy(ptr, elem, ib_bh->sizeof_elem); + + return(ptr); +} + +/**********************************************************************//** +Return the first element from the binary heap. +@return pointer to first element or NULL if empty. */ +UNIV_INLINE +void* +ib_bh_first( +/*========*/ + ib_bh_t* ib_bh) /*!< in: instance */ +{ + return(ib_bh_is_empty(ib_bh) ? NULL : ib_bh_get(ib_bh, 0)); +} + +/**********************************************************************//** +Return the last element from the binary heap. +@return pointer to last element or NULL if empty. */ +UNIV_INLINE +void* +ib_bh_last( +/*========*/ + ib_bh_t* ib_bh) /*!< in/out: instance */ +{ + return(ib_bh_is_empty(ib_bh) + ? NULL + : ib_bh_get(ib_bh, ib_bh_size(ib_bh) - 1)); +} + + === modified file 'storage/innobase/srv/srv0srv.c' --- a/storage/innobase/srv/srv0srv.c 2011-02-17 14:06:09 +0000 +++ b/storage/innobase/srv/srv0srv.c 2011-02-22 10:00:07 +0000 @@ -248,9 +248,12 @@ UNIV_INTERN ulong srv_max_buf_pool_modif /* the number of purge threads to use from the worker pool (currently 0 or 1).*/ UNIV_INTERN ulong srv_n_purge_threads = 0; -/* the number of UNDO log pages to purge in one batch */ +/* the number of pages to purge in one batch */ UNIV_INTERN ulong srv_purge_batch_size = 20; +/* the number of rollback segments to use */ +UNIV_INTERN ulong srv_rollback_segments = TRX_SYS_N_RSEGS; + /* variable counts amount of data read in total (in bytes) */ UNIV_INTERN ulint srv_data_read = 0; @@ -2639,7 +2642,7 @@ srv_task_execute(void) ut_a(srv_force_recovery < SRV_FORCE_NO_BACKGROUND); - os_atomic_inc_ulint(&purge_sys->mutex, &purge_sys->n_executing, 1); + os_atomic_inc_ulint(&purge_sys->bh_mutex, &purge_sys->n_executing, 1); mutex_enter(&srv_sys->tasks_mutex); @@ -2659,10 +2662,10 @@ srv_task_execute(void) que_run_threads(thr); os_atomic_inc_ulint( - &purge_sys->mutex, &purge_sys->n_completed, 1); + &purge_sys->bh_mutex, &purge_sys->n_completed, 1); } - os_atomic_dec_ulint(&purge_sys->mutex, &purge_sys->n_executing, 1); + os_atomic_dec_ulint(&purge_sys->bh_mutex, &purge_sys->n_executing, 1); return(thr != NULL); } === modified file 'storage/innobase/srv/srv0start.c' --- a/storage/innobase/srv/srv0start.c 2011-02-15 09:40:34 +0000 +++ b/storage/innobase/srv/srv0start.c 2011-02-22 05:11:15 +0000 @@ -1015,6 +1015,8 @@ innobase_start_or_create_for_mysql(void) my_bool srv_file_per_table_original_value = srv_file_per_table; mtr_t mtr; + ib_bh_t* ib_bh; + #ifdef HAVE_DARWIN_THREADS # ifdef F_FULLFSYNC /* This executable has been compiled on Mac OS X 10.3 or later. @@ -1628,12 +1630,12 @@ innobase_start_or_create_for_mysql(void) after the double write buffer has been created. */ trx_sys_create_sys_pages(); - trx_sys_init_at_db_start(); + ib_bh = trx_sys_init_at_db_start(); /* The purge system needs to create the purge view and therefore requires that the trx_sys is inited. */ - trx_purge_sys_create(srv_n_purge_threads); + trx_purge_sys_create(srv_n_purge_threads, ib_bh); dict_create(); @@ -1657,12 +1659,12 @@ innobase_start_or_create_for_mysql(void) dict_boot(); - trx_sys_init_at_db_start(); + ib_bh = trx_sys_init_at_db_start(); /* The purge system needs to create the purge view and therefore requires that the trx_sys is inited. */ - trx_purge_sys_create(srv_n_purge_threads); + trx_purge_sys_create(srv_n_purge_threads, ib_bh); srv_startup_is_before_trx_rollback_phase = FALSE; @@ -1720,12 +1722,12 @@ innobase_start_or_create_for_mysql(void) dict_boot(); - trx_sys_init_at_db_start(); + ib_bh = trx_sys_init_at_db_start(); /* The purge system needs to create the purge view and therefore requires that the trx_sys is inited. */ - trx_purge_sys_create(srv_n_purge_threads); + trx_purge_sys_create(srv_n_purge_threads, ib_bh); /* Initialize the fsp free limit global variable in the log system */ === modified file 'storage/innobase/sync/sync0sync.c' --- a/storage/innobase/sync/sync0sync.c 2011-02-15 09:40:34 +0000 +++ b/storage/innobase/sync/sync0sync.c 2011-02-22 05:11:15 +0000 @@ -1176,7 +1176,7 @@ sync_thread_add_level( case SYNC_RSEG: case SYNC_TRX_UNDO: case SYNC_PURGE_LATCH: - case SYNC_PURGE_SYS: + case SYNC_PURGE_QUEUE: case SYNC_DICT_AUTOINC_MUTEX: case SYNC_DICT_OPERATION: case SYNC_DICT_HEADER: @@ -1258,10 +1258,16 @@ sync_thread_add_level( || sync_thread_levels_g(array, SYNC_FSP, TRUE)); break; case SYNC_TRX_UNDO_PAGE: + /* Purge is allowed to read in as many UNDO pages as it likes, + there was a bogus rule here earlier that forced the caller to + acquire the purge_sys_t::mutex. The purge mutex did not really + protect anything because it was only ever acquired by the + single purge thread. The purge thread can read the UNDO pages + without any covering mutex. */ + ut_a(sync_thread_levels_contain(array, SYNC_TRX_UNDO) || sync_thread_levels_contain(array, SYNC_RSEG) - || sync_thread_levels_contain(array, SYNC_PURGE_SYS) - || sync_thread_levels_g(array, SYNC_TRX_UNDO_PAGE, TRUE)); + || sync_thread_levels_g(array, level - 1, TRUE)); break; case SYNC_RSEG_HEADER: ut_a(sync_thread_levels_contain(array, SYNC_RSEG)); === modified file 'storage/innobase/trx/trx0purge.c' --- a/storage/innobase/trx/trx0purge.c 2011-02-17 14:06:09 +0000 +++ b/storage/innobase/trx/trx0purge.c 2011-02-22 05:11:15 +0000 @@ -62,8 +62,8 @@ UNIV_INTERN mysql_pfs_key_t trx_purge_la #endif /* UNIV_PFS_RWLOCK */ #ifdef UNIV_PFS_MUTEX -/* Key to register purge_sys_mutex with performance schema */ -UNIV_INTERN mysql_pfs_key_t purge_sys_mutex_key; +/* Key to register purge_sys_bh_mutex with performance schema */ +UNIV_INTERN mysql_pfs_key_t purge_sys_bh_mutex_key; #endif /* UNIV_PFS_MUTEX */ /********************************************************************//** @@ -137,15 +137,22 @@ UNIV_INTERN void trx_purge_sys_create( /*=================*/ - ulint n_purge_threads) /*!< in: number of purge threads */ + ulint n_purge_threads, /*!< in: number of purge + threads */ + ib_bh_t* ib_bh) /*!< in, own: UNDO log min + binary heap */ { purge_sys = mem_zalloc(sizeof(*purge_sys)); + /* Take ownership of ib_bh, we are responsible for freeing it. */ + purge_sys->ib_bh = ib_bh; + rw_lock_create(trx_purge_latch_key, &purge_sys->latch, SYNC_PURGE_LATCH); - mutex_create(purge_sys_mutex_key, - &purge_sys->mutex, SYNC_PURGE_SYS); + mutex_create( + purge_sys_bh_mutex_key, &purge_sys->bh_mutex, + SYNC_PURGE_QUEUE); purge_sys->heap = mem_heap_create(256); @@ -199,9 +206,12 @@ trx_purge_sys_close(void) } rw_lock_free(&purge_sys->latch); - mutex_free(&purge_sys->mutex); + mutex_free(&purge_sys->bh_mutex); mem_heap_free(purge_sys->heap); + + ib_bh_free(purge_sys->ib_bh); + mem_free(purge_sys); purge_sys = NULL; @@ -224,30 +234,28 @@ trx_purge_add_update_undo_to_history( trx_undo_t* undo; trx_rseg_t* rseg; trx_rsegf_t* rseg_header; - trx_usegf_t* seg_header; trx_ulogf_t* undo_header; trx_upagef_t* page_header; - ulint hist_size; undo = trx->update_undo; - - ut_ad(undo); - rseg = undo->rseg; - ut_ad(mutex_own(&(rseg->mutex))); - - rseg_header = trx_rsegf_get(rseg->space, rseg->zip_size, - rseg->page_no, mtr); + rseg_header = trx_rsegf_get( + undo->rseg->space, undo->rseg->zip_size, undo->rseg->page_no, + mtr); undo_header = undo_page + undo->hdr_offset; - seg_header = undo_page + TRX_UNDO_SEG_HDR; page_header = undo_page + TRX_UNDO_PAGE_HDR; if (undo->state != TRX_UNDO_CACHED) { + ulint hist_size; +#ifdef UNIV_DEBUG + trx_usegf_t* seg_header = undo_page + TRX_UNDO_SEG_HDR; +#endif /* UNIV_DEBUG */ + /* The undo log segment will not be reused */ - if (undo->id >= TRX_RSEG_N_SLOTS) { + if (UNIV_UNLIKELY(undo->id >= TRX_RSEG_N_SLOTS)) { fprintf(stderr, "InnoDB: Error: undo->id is %lu\n", (ulong) undo->id); @@ -258,13 +266,15 @@ trx_purge_add_update_undo_to_history( MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_USED); - hist_size = mtr_read_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE, - MLOG_4BYTES, mtr); + hist_size = mtr_read_ulint( + rseg_header + TRX_RSEG_HISTORY_SIZE, MLOG_4BYTES, mtr); + ut_ad(undo->size == flst_get_len( seg_header + TRX_UNDO_PAGE_LIST, mtr)); - mlog_write_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE, - hist_size + undo->size, MLOG_4BYTES, mtr); + mlog_write_ulint( + rseg_header + TRX_RSEG_HISTORY_SIZE, + hist_size + undo->size, MLOG_4BYTES, mtr); } /* Add the log as the first in the history list */ @@ -283,6 +293,7 @@ trx_purge_add_update_undo_to_history( /* Write the trx number to the undo log header */ mlog_write_ull(undo_header + TRX_UNDO_TRX_NO, trx->no, mtr); + /* Write information about delete markings to the undo log header */ if (!undo->del_marks) { @@ -291,23 +302,10 @@ trx_purge_add_update_undo_to_history( } if (rseg->last_page_no == FIL_NULL) { - void* ptr; - rseg_queue_t rseg_queue; - rseg->last_page_no = undo->hdr_page_no; rseg->last_offset = undo->hdr_offset; rseg->last_trx_no = trx->no; rseg->last_del_marks = undo->del_marks; - - rseg_queue.rseg = rseg; - rseg_queue.trx_no = rseg->last_trx_no; - - rw_lock_x_lock(&trx_sys->lock); - - ptr = ib_bh_push(trx_sys->ib_bh, &rseg_queue); - ut_a(ptr); - - rw_lock_x_unlock(&trx_sys->lock); } } @@ -334,8 +332,6 @@ trx_purge_free_segment( /* fputs("Freeing an update undo log segment\n", stderr); */ - ut_ad(mutex_own(&(purge_sys->mutex))); - for (;;) { page_t* undo_page; @@ -373,6 +369,7 @@ trx_purge_free_segment( } mutex_exit(&rseg->mutex); + mtr_commit(&mtr); } @@ -444,8 +441,6 @@ trx_purge_truncate_rseg_history( mtr_t mtr; trx_id_t undo_trx_no; - ut_ad(mutex_own(&(purge_sys->mutex))); - mtr_start(&mtr); mutex_enter(&(rseg->mutex)); @@ -546,8 +541,6 @@ trx_purge_truncate_history( { ulint i; - ut_ad(purge_mutex_own()); - /* We play safe and set the truncate limit at most to the purge view low_limit number, though this is not necessary */ @@ -568,6 +561,7 @@ trx_purge_truncate_history( } } + /***********************************************************************//** Updates the last not yet purged history log info in rseg when we have purged a whole undo log. Advances also purge_sys->purge_trx_no past the purged log. */ @@ -579,7 +573,7 @@ trx_purge_rseg_get_next_history_log( ulint* n_pages_handled)/*!< in/out: number of UNDO pages handled */ { - void* ptr; + const void* ptr; page_t* undo_page; trx_ulogf_t* log_hdr; trx_usegf_t* seg_hdr; @@ -589,8 +583,6 @@ trx_purge_rseg_get_next_history_log( mtr_t mtr; rseg_queue_t rseg_queue; - ut_ad(mutex_own(&(purge_sys->mutex))); - mutex_enter(&(rseg->mutex)); ut_a(rseg->last_page_no != FIL_NULL); @@ -601,8 +593,9 @@ trx_purge_rseg_get_next_history_log( mtr_start(&mtr); - undo_page = trx_undo_page_get_s_latched(rseg->space, rseg->zip_size, - rseg->last_page_no, &mtr); + undo_page = trx_undo_page_get_s_latched( + rseg->space, rseg->zip_size, rseg->last_page_no, &mtr); + log_hdr = undo_page + rseg->last_offset; seg_hdr = undo_page + TRX_UNDO_SEG_HDR; @@ -627,11 +620,11 @@ trx_purge_rseg_get_next_history_log( on the MySQL mailing list on Nov 9, 2004. The fut0lst.c file-based list was corrupt. The prev node pointer was FIL_NULL, even though the list length was over 8 million nodes! - We assume that purge truncates the history list in moderate + We assume that purge truncates the history list in large size pieces, and if we here reach the head of the list, the - list cannot be longer than 20 000 undo logs now. */ + list cannot be longer than 2000 000 undo logs now. */ - if (trx_sys->rseg_history_len > 20000) { + if (trx_sys->rseg_history_len > 2000000) { ut_print_timestamp(stderr); fprintf(stderr, " InnoDB: Warning: purge reached the" @@ -648,7 +641,8 @@ trx_purge_rseg_get_next_history_log( return; } - mutex_exit(&(rseg->mutex)); + mutex_exit(&rseg->mutex); + mtr_commit(&mtr); /* Read the trx number and del marks from the previous log header */ @@ -674,19 +668,25 @@ trx_purge_rseg_get_next_history_log( rseg_queue.rseg = rseg; rseg_queue.trx_no = rseg->last_trx_no; - mutex_exit(&(rseg->mutex)); + /* Purge can also produce events, however these are already ordered + in the rollback segment and any user generated event will be greater + than the events that Purge produces. ie. Purge can never produce + events from an empty rollback segment. */ - rw_lock_x_lock(&trx_sys->lock); + mutex_enter(&purge_sys->bh_mutex); - ptr = ib_bh_push(trx_sys->ib_bh, &rseg_queue); + ptr = ib_bh_push(purge_sys->ib_bh, &rseg_queue); ut_a(ptr != NULL); - rw_lock_x_unlock(&trx_sys->lock); + mutex_exit(&purge_sys->bh_mutex); + + mutex_exit(&rseg->mutex); } /***********************************************************************//** Chooses the rollback segment with the smallest trx_id. -@return zip_size if log is for a compressed table */ +@return zip_size if log is for a compressed table, ULINT_UNDEFINED if + no rollback segments to purge, 0 for non compressed tables. */ static ulint trx_purge_get_rseg_with_min_trx_id( @@ -695,51 +695,46 @@ trx_purge_get_rseg_with_min_trx_id( { ulint zip_size = 0; - trx_id_t last_trx_no = IB_ULONGLONG_MAX; - - ut_ad(mutex_own(&(purge_sys->mutex))); - rw_lock_s_lock(&trx_sys->lock); + mutex_enter(&purge_sys->bh_mutex); - if (!ib_bh_is_empty(trx_sys->ib_bh)) { - rseg_queue_t* rseg_queue; + /* Only purge consumes events from the binary heap, user + threads only produce the events. */ - rseg_queue = ib_bh_first(trx_sys->ib_bh); + if (!ib_bh_is_empty(purge_sys->ib_bh)) { + trx_rseg_t* rseg; - purge_sys->rseg = rseg_queue->rseg; - last_trx_no = rseg_queue->trx_no; + rseg = ((rseg_queue_t*) ib_bh_first(purge_sys->ib_bh))->rseg; + ib_bh_pop(purge_sys->ib_bh); - ib_bh_pop(trx_sys->ib_bh); + mutex_exit(&purge_sys->bh_mutex); + purge_sys->rseg = rseg; } else { - purge_sys->rseg = NULL; - } + mutex_exit(&purge_sys->bh_mutex); - rw_lock_s_unlock(&trx_sys->lock); - - if (purge_sys->rseg != NULL) { - trx_rseg_t* rseg = purge_sys->rseg; - - mutex_enter(&rseg->mutex); + purge_sys->rseg = NULL; - ut_a(rseg->last_page_no != FIL_NULL); + return(ULINT_UNDEFINED); + } - /* We assume in purge of externally stored fields that - space id == 0 */ - ut_a(rseg->space == 0); + ut_a(purge_sys->rseg != NULL); - zip_size = rseg->zip_size; + mutex_enter(&purge_sys->rseg->mutex); - ut_a(last_trx_no == rseg->last_trx_no); + ut_a(purge_sys->rseg->last_page_no != FIL_NULL); - purge_sys->iter.trx_no = rseg->last_trx_no; + /* We assume in purge of externally stored fields that space id == 0 */ + ut_a(purge_sys->rseg->space == 0); + zip_size = purge_sys->rseg->zip_size; - purge_sys->hdr_offset = rseg->last_offset; + ut_a(purge_sys->iter.trx_no <= purge_sys->rseg->last_trx_no); - purge_sys->hdr_page_no = rseg->last_page_no; + purge_sys->iter.trx_no = purge_sys->rseg->last_trx_no; + purge_sys->hdr_offset = purge_sys->rseg->last_offset; + purge_sys->hdr_page_no = purge_sys->rseg->last_page_no; - mutex_exit(&rseg->mutex); - } + mutex_exit(&purge_sys->rseg->mutex); return(zip_size); } @@ -805,13 +800,15 @@ trx_purge_choose_next_log(void) { ulint zip_size; - ut_ad(mutex_own(&(purge_sys->mutex))); ut_ad(purge_sys->next_stored == FALSE); zip_size = trx_purge_get_rseg_with_min_trx_id(purge_sys); if (purge_sys->rseg != NULL) { trx_purge_read_undo_rec(purge_sys, zip_size); + } else { + /* There is nothing to do yet. */ + os_thread_yield(); } } @@ -837,7 +834,6 @@ trx_purge_get_next_rec( ulint zip_size; mtr_t mtr; - ut_ad(mutex_own(&(purge_sys->mutex))); ut_ad(purge_sys->next_stored); space = purge_sys->rseg->space; @@ -875,9 +871,9 @@ trx_purge_get_next_rec( /* Try first to find the next record which requires a purge operation from the same page of the same undo log */ - next_rec = trx_undo_page_get_next_rec(rec2, - purge_sys->hdr_page_no, - purge_sys->hdr_offset); + next_rec = trx_undo_page_get_next_rec( + rec2, purge_sys->hdr_page_no, purge_sys->hdr_offset); + if (next_rec == NULL) { rec2 = trx_undo_get_next_rec( rec2, purge_sys->hdr_page_no, @@ -959,8 +955,6 @@ trx_purge_fetch_next_rec( const read_view_t* view = purge_sys->view; const purge_iter_t iter = purge_sys->iter; - ut_ad(mutex_own(&purge_sys->mutex)); - if (!purge_sys->next_stored) { trx_purge_choose_next_log(); @@ -996,10 +990,10 @@ trx_purge_fetch_next_rec( return(trx_purge_get_next_rec(n_pages_handled, heap)); } -/***********************************************************//** -Fetches an undo log record into the purge nodes in the query graph. -@return the number of pages processed. */ -static +/*******************************************************************//** +This function runs a purge batch. +@return number of undo log pages handled in the batch */ +UNIV_INTERN ulint trx_purge_attach_undo_recs( /*=======================*/ @@ -1013,8 +1007,6 @@ trx_purge_attach_undo_recs( ulint n_pages_handled = 0; ulint n_thrs = UT_LIST_GET_LEN(purge_sys->query->thrs); - ut_ad(mutex_own(&purge_sys->mutex)); - *limit = purge_sys->iter; /* Debug code to validate some pre-requisites and reset done flag. */ @@ -1203,10 +1195,14 @@ void trx_purge_truncate(void) /*====================*/ { - ut_ad(mutex_own(&purge_sys->mutex)); + static ulint count; ut_ad(trx_purge_check_limit()); + if (++count % TRX_SYS_N_RSEGS) { + return; + } + if (purge_sys->limit.trx_no == 0) { trx_purge_truncate_history(&purge_sys->iter, purge_sys->view); } else { @@ -1231,8 +1227,6 @@ trx_purge( srv_dml_needed_delay = trx_purge_dml_delay(); - mutex_enter(&purge_sys->mutex); - /* The number of tasks submitted should be completed. */ ut_a(purge_sys->n_submitted == purge_sys->n_completed); @@ -1252,8 +1246,6 @@ trx_purge( n_pages_handled = trx_purge_attach_undo_recs( n_purge_threads, purge_sys, &purge_sys->limit, batch_size); - mutex_exit(&purge_sys->mutex); - /* Do we do an asynchronous purge or not ? */ if (n_purge_threads > 0) { ulint i = 0; @@ -1290,7 +1282,7 @@ run_synchronously: que_run_threads(thr); os_atomic_inc_ulint( - &purge_sys->mutex, &purge_sys->n_completed, 1); + &purge_sys->bh_mutex, &purge_sys->n_completed, 1); if (n_purge_threads > 1) { trx_purge_wait_for_workers_to_complete(purge_sys); @@ -1301,16 +1293,12 @@ run_synchronously: to have already exited via os_event_wait(). We only truncate the UNDO log when we are 100% sure that all submitted tasks have completed. */ - mutex_enter(&purge_sys->mutex); - if (purge_sys->n_submitted == purge_sys->n_completed) { trx_purge_truncate(); } MONITOR_INC_VALUE(MONITOR_PURGE_N_PAGE_HANDLED, n_pages_handled); - mutex_exit(&purge_sys->mutex); - if (srv_print_thread_releases) { fprintf(stderr, "Purge ends; pages handled %lu\n", n_pages_handled); === modified file 'storage/innobase/trx/trx0rseg.c' --- a/storage/innobase/trx/trx0rseg.c 2011-01-18 21:14:22 +0000 +++ b/storage/innobase/trx/trx0rseg.c 2011-02-22 05:11:15 +0000 @@ -167,12 +167,15 @@ static trx_rseg_t* trx_rseg_mem_create( /*================*/ - ulint id, /*!< in: rollback segment id */ - ulint space, /*!< in: space where the segment placed */ - ulint zip_size, /*!< in: compressed page size in bytes - or 0 for uncompressed pages */ - ulint page_no, /*!< in: page number of the segment header */ - mtr_t* mtr) /*!< in: mtr */ + ulint id, /*!< in: rollback segment id */ + ulint space, /*!< in: space where the segment + placed */ + ulint zip_size, /*!< in: compressed page size in bytes + or 0 for uncompressed pages */ + ulint page_no, /*!< in: page number of the segment + header */ + ib_bh_t* ib_bh, /*!< in/out: rseg queue */ + mtr_t* mtr) /*!< in: mtr */ { ulint len; trx_rseg_t* rseg; @@ -194,51 +197,56 @@ trx_rseg_mem_create( rseg_header = trx_rsegf_get_new(space, zip_size, page_no, mtr); - rseg->max_size = mtr_read_ulint(rseg_header + TRX_RSEG_MAX_SIZE, - MLOG_4BYTES, mtr); + rseg->max_size = mtr_read_ulint( + rseg_header + TRX_RSEG_MAX_SIZE, MLOG_4BYTES, mtr); /* Initialize the undo log lists according to the rseg header */ sum_of_undo_sizes = trx_undo_lists_init(rseg); - rseg->curr_size = mtr_read_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE, - MLOG_4BYTES, mtr) + rseg->curr_size = mtr_read_ulint( + rseg_header + TRX_RSEG_HISTORY_SIZE, MLOG_4BYTES, mtr) + 1 + sum_of_undo_sizes; len = flst_get_len(rseg_header + TRX_RSEG_HISTORY, mtr); + if (len > 0) { - void* ptr; rseg_queue_t rseg_queue; trx_sys->rseg_history_len += len; node_addr = trx_purge_get_log_from_hist( flst_get_last(rseg_header + TRX_RSEG_HISTORY, mtr)); + rseg->last_page_no = node_addr.page; rseg->last_offset = node_addr.boffset; - undo_log_hdr = trx_undo_page_get(rseg->space, rseg->zip_size, - node_addr.page, - mtr) + node_addr.boffset; + undo_log_hdr = trx_undo_page_get( + rseg->space, rseg->zip_size, node_addr.page, + mtr) + node_addr.boffset; rseg->last_trx_no = mach_read_from_8( undo_log_hdr + TRX_UNDO_TRX_NO); + rseg->last_del_marks = mtr_read_ulint( undo_log_hdr + TRX_UNDO_DEL_MARKS, MLOG_2BYTES, mtr); rseg_queue.rseg = rseg; rseg_queue.trx_no = rseg->last_trx_no; - /* There is no need to cover this operation by the purge - mutex because we are still bootstrapping. */ + if (rseg->last_page_no != FIL_NULL) { + const void* ptr; - ptr = ib_bh_push(trx_sys->ib_bh, &rseg_queue); - ut_a(ptr != NULL); + /* There is no need to cover this operation by the purge + mutex because we are still bootstrapping. */ + + ptr = ib_bh_push(ib_bh, &rseg_queue); + ut_a(ptr != NULL); + } } else { rseg->last_page_no = FIL_NULL; } - return(rseg); } @@ -249,8 +257,9 @@ static void trx_rseg_create_instance( /*=====================*/ - trx_sysf_t* sys_header, /*!< in/out: trx system header */ - mtr_t* mtr) /*!< in/out: mtr */ + trx_sysf_t* sys_header, /*!< in: trx system header */ + ib_bh_t* ib_bh, /*!< in/out: rseg queue */ + mtr_t* mtr) /*!< in: mtr */ { ulint i; @@ -273,7 +282,7 @@ trx_rseg_create_instance( zip_size = space ? fil_space_get_zip_size(space) : 0; rseg = trx_rseg_mem_create( - i, space, zip_size, page_no, mtr); + i, space, zip_size, page_no, ib_bh, mtr); ut_a(rseg->id == i); } @@ -318,7 +327,8 @@ trx_rseg_create(void) zip_size = space ? fil_space_get_zip_size(space) : 0; rseg = trx_rseg_mem_create( - slot_no, space, zip_size, page_no, &mtr); + slot_no, space, zip_size, page_no, + purge_sys->ib_bh, &mtr); } mtr_commit(&mtr); @@ -334,9 +344,10 @@ void trx_rseg_array_init( /*================*/ trx_sysf_t* sys_header, /* in/out: trx system header */ - mtr_t* mtr) /* in/out: mtr */ + ib_bh_t* ib_bh, /*!< in: rseg queue */ + mtr_t* mtr) /*!< in: mtr */ { trx_sys->rseg_history_len = 0; - trx_rseg_create_instance(sys_header, mtr); + trx_rseg_create_instance(sys_header, ib_bh, mtr); } === modified file 'storage/innobase/trx/trx0sys.c' --- a/storage/innobase/trx/trx0sys.c 2011-01-24 10:28:28 +0000 +++ b/storage/innobase/trx/trx0sys.c 2011-02-22 05:11:15 +0000 @@ -950,23 +950,58 @@ trx_sysf_create( } /*****************************************************************//** +Compare two trx_rseg_t instances on last_trx_no. */ +static +int +trx_rseg_compare_last_trx_no( +/*=========================*/ + const void* p1, /*!< in: elem to compare */ + const void* p2) /*!< in: elem to compare */ +{ + ib_int64_t cmp; + + const rseg_queue_t* rseg_q1 = (const rseg_queue_t*) p1; + const rseg_queue_t* rseg_q2 = (const rseg_queue_t*) p2; + + cmp = rseg_q1->trx_no - rseg_q2->trx_no; + + if (cmp < 0) { + return(-1); + } else if (cmp > 0) { + return(1); + } + + return(0); +} + +/*****************************************************************//** Creates and initializes the central memory structures for the transaction -system. This is called when the database is started. */ +system. This is called when the database is started. +@return min binary heap of rsegs to purge */ UNIV_INTERN -void +ib_bh_t* trx_sys_init_at_db_start(void) /*==========================*/ { + mtr_t mtr; + ib_bh_t* ib_bh; trx_sysf_t* sys_header; ib_uint64_t rows_to_undo = 0; const char* unit = ""; - mtr_t mtr; + + /* We create the min binary heap here and pass ownership to + purge when we init the purge sub-system. Purge is responsible + for freeing the binary heap. */ + + ib_bh = ib_bh_create( + trx_rseg_compare_last_trx_no, + sizeof(rseg_queue_t), TRX_SYS_N_RSEGS); mtr_start(&mtr); sys_header = trx_sysf_get(&mtr); - trx_rseg_array_init(sys_header, &mtr); + trx_rseg_array_init(sys_header, ib_bh, &mtr); /* VERY important: after the database is started, max_trx_id value is divisible by TRX_SYS_TRX_ID_WRITE_MARGIN, and the 'if' in @@ -986,6 +1021,10 @@ trx_sys_init_at_db_start(void) trx_lists_init_at_db_start(); + /* This S lock is not strictly required, it is here only to satisfy + the debug code (assertions). We are still running in single threaded + bootstrap mode. */ + rw_lock_s_lock(&trx_sys->lock); if (UT_LIST_GET_LEN(trx_sys->trx_list) > 0) { @@ -1022,31 +1061,8 @@ trx_sys_init_at_db_start(void) UT_LIST_INIT(trx_sys->view_list); mtr_commit(&mtr); -} -/*****************************************************************//** -Compare two trx_rseg_t instances on last_trx_no. */ -static -int -trx_rseg_compare_last_trx_no( -/*=========================*/ - const void* p1, /*!< in: elem to compare */ - const void* p2) /*!< in: elem to compare */ -{ - ib_int64_t cmp; - - const rseg_queue_t* rseg_q1 = (const rseg_queue_t*) p1; - const rseg_queue_t* rseg_q2 = (const rseg_queue_t*) p2; - - cmp = rseg_q1->trx_no - rseg_q2->trx_no; - - if (cmp < 0) { - return(-1); - } else if (cmp > 0) { - return(1); - } - - return(0); + return(ib_bh); } /*****************************************************************//** @@ -1061,10 +1077,6 @@ trx_sys_create(void) trx_sys = mem_zalloc(sizeof(*trx_sys)); - trx_sys->ib_bh = ib_bh_create( - trx_rseg_compare_last_trx_no, - sizeof(rseg_queue_t), TRX_SYS_N_RSEGS * 128); - rw_lock_create(trx_sys_rw_lock_key, &trx_sys->lock, SYNC_TRX_SYS); mutex_create( @@ -1694,8 +1706,6 @@ trx_sys_close(void) rw_lock_free(&trx_sys->lock); - ib_bh_free(trx_sys->ib_bh); - mem_free(trx_sys); trx_sys = NULL; === modified file 'storage/innobase/trx/trx0trx.c' --- a/storage/innobase/trx/trx0trx.c 2011-02-17 14:06:09 +0000 +++ b/storage/innobase/trx/trx0trx.c 2011-02-22 05:11:15 +0000 @@ -43,6 +43,7 @@ Created 3/26/1996 Heikki Tuuri #include "btr0sea.h" #include "os0proc.h" #include "trx0xa.h" +#include "trx0purge.h" #include "ha_prototypes.h" #include "srv0mon.h" @@ -595,11 +596,12 @@ trx_lists_init_at_db_start(void) /******************************************************************//** Assigns a rollback segment to a transaction in a round-robin fashion. -@return assigned rollback segment */ +@return assigned rollback segment instance */ UNIV_INLINE trx_rseg_t* -trx_assign_rseg(void) -/*=================*/ +trx_assign_rseg( +/*============*/ + ulint max_undo_logs) /*!< in: maximum number of UNDO logs to use */ { ulint i; trx_rseg_t* rseg; @@ -607,8 +609,16 @@ trx_assign_rseg(void) /* This breaks true round robin but that should be OK. */ + ut_a(max_undo_logs > 0 && max_undo_logs <= TRX_SYS_N_RSEGS); + i = latest_rseg++; - i %= TRX_SYS_N_RSEGS; + i %= max_undo_logs; + + /* Note: The assumtion here is that there can't be any gaps in + the array. Once we implement more flexible rollback segment + management this may not hold. The assertion checks for that case. */ + + ut_a(trx_sys->rseg_array[0] != NULL); do { rseg = trx_sys->rseg_array[i]; @@ -618,14 +628,6 @@ trx_assign_rseg(void) } while (rseg == NULL); -#ifdef UNIV_DEBUG - rw_lock_x_lock(&trx_sys->lock); - - ut_ad(trx_sys_validate_trx_list()); - - rw_lock_x_unlock(&trx_sys->lock); -#endif /* UNIV_DEBUG */ - return(rseg); } @@ -648,7 +650,7 @@ trx_start_low( /* The initial value for trx->no: IB_ULONGLONG_MAX is used in read_view_open_now: */ - rseg = trx_assign_rseg(); + rseg = trx_assign_rseg(srv_rollback_segments); trx->no = IB_ULONGLONG_MAX; @@ -663,6 +665,7 @@ trx_start_low( trx->in_mysql_trx_list would hold. In that case, the trx->state change must be protected by the trx_sys->lock, so that lock_print_info_all_transactions() will have a consistent view. */ + trx->state = TRX_STATE_ACTIVE; trx->id = trx_sys_get_new_trx_id(); @@ -679,100 +682,170 @@ trx_start_low( } /****************************************************************//** -Commits a transaction. */ -UNIV_INTERN +Set the transaction serialisation number. */ +static void -trx_commit( -/*=======*/ - trx_t* trx) /*!< in: transaction */ +trx_serialisation_number_get( +/*=========================*/ + trx_t* trx) /*!< in: transaction */ { - mtr_t mtr; - trx_named_savept_t* savep; - ib_uint64_t lsn = 0; - page_t* update_hdr_page; + trx_rseg_t* rseg; - ut_ad(trx->in_trx_list); - ut_ad(!trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY)); + rseg = trx->rseg; - if (trx->insert_undo != NULL || trx->update_undo != NULL) { - trx_rseg_t* rseg; + ut_ad(mutex_own(&rseg->mutex)); + + rw_lock_x_lock(&trx_sys->lock); + + trx->no = trx_sys_get_new_trx_id(); + + /* If the rollack segment is not empty then the + new trx_t::no can't be less than any trx_t::no + already in the rollback segment. User threads only + produce events when a rollback segment is empty. */ + + if (rseg->last_page_no == FIL_NULL) { + void* ptr; + rseg_queue_t rseg_queue; + + rseg_queue.rseg = rseg; + rseg_queue.trx_no = trx->no; + + mutex_enter(&purge_sys->bh_mutex); + + /* This is to reduce the pressure on the trx_sys_t::lock + though in reality it should make very little (read no) + difference because this code path is only taken when the + rbs is empty. */ + + rw_lock_x_unlock(&trx_sys->lock); + + ptr = ib_bh_push(purge_sys->ib_bh, &rseg_queue); + ut_a(ptr); + + mutex_exit(&purge_sys->bh_mutex); + } else { + rw_lock_x_unlock(&trx_sys->lock); + } +} - rseg = trx->rseg; +/****************************************************************//** +Assign the transaction its history serialisation number and write the +update UNDO log record to the assigned rollback segment. +@return the LSN of the UNDO log write. */ +static +ib_uint64_t +trx_write_serialisation_history( +/*============================*/ + trx_t* trx) /*!< in: transaction */ +{ + + mtr_t mtr; + trx_rseg_t* rseg; + + rseg = trx->rseg; - mtr_start(&mtr); + mtr_start(&mtr); - /* Change the undo log segment states from TRX_UNDO_ACTIVE - to some other state: these modifications to the file data - structure define the transaction as committed in the file - based world, at the serialization point of the log sequence - number lsn obtained below. */ + /* Change the undo log segment states from TRX_UNDO_ACTIVE + to some other state: these modifications to the file data + structure define the transaction as committed in the file + based domain, at the serialization point of the log sequence + number lsn obtained below. */ + + if (trx->update_undo != NULL) { + page_t* undo_hdr_page; + trx_undo_t* undo = trx->update_undo; + + /* We have to hold the rseg mutex because update + log headers have to be put to the history list in the + (serialisation) order of the UNDO trx number. This is + required for the purge in-memory data structures too. */ mutex_enter(&rseg->mutex); - if (trx->insert_undo != NULL) { - trx_undo_set_state_at_finish(trx->insert_undo, &mtr); - } + /* Assign the transaction serialisation number and also + update the purge min binary heap if this is the first + UNDO log being written to the assigned rollback segment. */ + + trx_serialisation_number_get(trx); + + /* It is not necessary to obtain trx->undo_mutex here + because only a single OS thread is allowed to do the + transaction commit for this transaction. */ - if (trx->update_undo != NULL) { + undo_hdr_page = trx_undo_set_state_at_finish(undo, &mtr); - rw_lock_x_lock(&trx_sys->lock); + trx_undo_update_cleanup(trx, undo_hdr_page, &mtr); + } else { + mutex_enter(&rseg->mutex); + } - trx->no = trx_sys_get_new_trx_id(); + if (trx->insert_undo != NULL) { + trx_undo_set_state_at_finish(trx->insert_undo, &mtr); + } - rw_lock_x_unlock(&trx_sys->lock); + mutex_exit(&rseg->mutex); - /* It is not necessary to obtain trx->undo_mutex here - because only a single OS thread is allowed to do the - transaction commit for this transaction. */ + MONITOR_INC(MONITOR_TRX_COMMIT_UNDO); - update_hdr_page = trx_undo_set_state_at_finish( - trx->update_undo, &mtr); + /* Update the latest MySQL binlog name and offset info + in trx sys header if MySQL binlogging is on or the database + server is a MySQL replication slave */ - /* We have to do the cleanup for the update log while - holding the rseg mutex because update log headers - have to be put to the history list in the order of - the trx number. */ + if (trx->mysql_log_file_name + && trx->mysql_log_file_name[0] != '\0') { - trx_undo_update_cleanup(trx, update_hdr_page, &mtr); - } + trx_sys_update_mysql_binlog_offset( + trx->mysql_log_file_name, + trx->mysql_log_offset, + TRX_SYS_MYSQL_LOG_INFO, &mtr); - MONITOR_INC(MONITOR_TRX_COMMIT_UNDO); + trx->mysql_log_file_name = NULL; + } - mutex_exit(&rseg->mutex); + /* The following call commits the mini-transaction, making the + whole transaction committed in the file-based world, at this + log sequence number. The transaction becomes 'durable' when + we write the log to disk, but in the logical sense the commit + in the file-based data structures (undo logs etc.) happens + here. - /* Update the latest MySQL binlog name and offset info - in trx sys header if MySQL binlogging is on or the database - server is a MySQL replication slave */ - - if (trx->mysql_log_file_name - && trx->mysql_log_file_name[0] != '\0') { - trx_sys_update_mysql_binlog_offset( - trx->mysql_log_file_name, - trx->mysql_log_offset, - TRX_SYS_MYSQL_LOG_INFO, &mtr); - trx->mysql_log_file_name = NULL; - } + NOTE that transaction numbers, which are assigned only to + transactions with an update undo log, do not necessarily come + in exactly the same order as commit lsn's, if the transactions + have different rollback segments. To get exactly the same + order we should hold the kernel mutex up to this point, + adding to the contention of the kernel mutex. However, if + a transaction T2 is able to see modifications made by + a transaction T1, T2 will always get a bigger transaction + number and a bigger commit lsn than T1. */ + + /*--------------*/ + mtr_commit(&mtr); + /*--------------*/ + + return(mtr.end_lsn); +} - /* The following call commits the mini-transaction, making the - whole transaction committed in the file-based world, at this - log sequence number. The transaction becomes 'durable' when - we write the log to disk, but in the logical sense the commit - in the file-based data structures (undo logs etc.) happens - here. - - NOTE that transaction numbers, which are assigned only to - transactions with an update undo log, do not necessarily come - in exactly the same order as commit lsn's, if the transactions - have different rollback segments. To get exactly the same - order we should hold the trx sys mutex up to this point. - However, if a transaction T2 is able to see modifications - made by a transaction T1, T2 will always get a bigger - transaction number and a bigger commit lsn than T1. */ - - /*--------------*/ - mtr_commit(&mtr); - /*--------------*/ - lsn = mtr.end_lsn; +/****************************************************************//** +Commits a transaction. */ +UNIV_INTERN +void +trx_commit( +/*=======*/ + trx_t* trx) /*!< in: transaction */ +{ + trx_named_savept_t* savep; + ib_uint64_t lsn = 0; + + ut_ad(trx->in_trx_list); + ut_ad(!trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY)); + + if (trx->insert_undo != NULL || trx->update_undo != NULL) { + lsn = trx_write_serialisation_history(trx); + } else { + lsn = 0; } trx->must_flush_log_later = FALSE; @@ -781,14 +854,17 @@ trx_commit( /* Remove the transaction from the list of active transactions now that it no longer holds any user locks. */ + rw_lock_x_lock(&trx_sys->lock); ut_ad(trx->in_trx_list); ut_d(trx->in_trx_list = FALSE); UT_LIST_REMOVE(trx_list, trx_sys->trx_list, trx); + /* If this transaction came from trx_allocate_for_mysql(), trx->in_mysql_trx_list would hold. In that case, the trx->state change must be protected by trx_sys->lock, so that lock_print_info_all_transactions() will have a consistent view. */ + trx->state = TRX_STATE_NOT_STARTED; ut_ad(trx_sys_validate_trx_list()); MONITOR_INC(MONITOR_TRX_COMMIT); === added file 'storage/innobase/ut/ut0bh.c' --- a/storage/innobase/ut/ut0bh.c 1970-01-01 00:00:00 +0000 +++ b/storage/innobase/ut/ut0bh.c 2011-02-23 06:48:15 +0000 @@ -0,0 +1,164 @@ +/***************************************************************************//** +Copyright (c) 2010, 2011, Oracle Corpn. All Rights Reserved. + +Portions of this file contain modifications contributed and copyrighted by +Sun Microsystems, Inc. Those modifications are gratefully acknowledged and +are described briefly in the InnoDB documentation. The contributions by +Sun Microsystems are incorporated with their permission, and subject to the +conditions contained in the file COPYING.Sun_Microsystems. + +This program is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free Software +Foundation; version 2 of the License. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 Temple +Place, Suite 330, Boston, MA 02111-1307 USA + +*****************************************************************************/ + +/******************************************************************//** +@file ut/ut0bh.c +Binary min-heap implementation. + +Created 2010-05-28 by Sunny Bains +*******************************************************/ + +#include "ut0bh.h" +#include "ut0mem.h" + +#ifdef UNIV_NONINL +#include "ut0bh.ic" +#endif + +#include + +/**********************************************************************//** +Create a binary heap. +@return a new binary heap */ +UNIV_INTERN +ib_bh_t* +ib_bh_create( +/*=========*/ + ib_bh_cmp_t compare, /*!< in: comparator */ + ulint sizeof_elem, /*!< in: size of one element */ + ulint max_elems) /*!< in: max elements allowed */ +{ + ulint sz; + ib_bh_t* ib_bh; + + sz = sizeof(*ib_bh) + (sizeof_elem * max_elems); + + ib_bh = (ib_bh_t*) ut_malloc(sz); + memset(ib_bh, 0x0, sz); + + ib_bh->compare = compare; + ib_bh->max_elems = max_elems; + ib_bh->sizeof_elem = sizeof_elem; + + return(ib_bh); +} + +/**********************************************************************//** +Free a binary heap. +@return a new binary heap */ +UNIV_INTERN +void +ib_bh_free( +/*=======*/ + ib_bh_t* ib_bh) /*!< in/own: instance */ +{ + ut_free(ib_bh); +} + +/**********************************************************************//** +Add an element to the binary heap. Note: The element is copied. +@return pointer to added element or NULL if full. */ +UNIV_INTERN +void* +ib_bh_push( +/*=======*/ + ib_bh_t* ib_bh, /*!< in/out: instance */ + const void* elem) /*!< in: element to add */ +{ + void* ptr; + + if (ib_bh_is_full(ib_bh)) { + return(NULL); + } else if (ib_bh_is_empty(ib_bh)) { + ++ib_bh->n_elems; + return(ib_bh_set(ib_bh, 0, elem)); + } else { + ulint i; + + i = ib_bh->n_elems; + + ++ib_bh->n_elems; + + for (ptr = ib_bh_get(ib_bh, i >> 1); + i > 0 && ib_bh->compare(ptr, elem) > 0; + i >>= 1, ptr = ib_bh_get(ib_bh, i >> 1)) { + + ib_bh_set(ib_bh, i, ptr); + } + + ptr = ib_bh_set(ib_bh, i, elem); + } + + return(ptr); +} + +/**********************************************************************//** +Remove the first element from the binary heap. */ +UNIV_INTERN +void +ib_bh_pop( +/*======*/ + ib_bh_t* ib_bh) /*!< in/out: instance */ +{ + byte* ptr; + byte* last; + ulint parent = 0; + + if (ib_bh_is_empty(ib_bh)) { + return; + } else if (ib_bh_size(ib_bh) == 1) { + --ib_bh->n_elems; + return; + } + + last = (byte*) ib_bh_last(ib_bh); + + /* Start from the child node */ + ptr = (byte*) ib_bh_get(ib_bh, 1); + + while (ptr < last) { + /* If the "right" child node is < "left" child node */ + if (ib_bh->compare(ptr + ib_bh->sizeof_elem, ptr) < 0) { + ptr += ib_bh->sizeof_elem; + } + + if (ib_bh->compare(last, ptr) <= 0) { + break; + } + + ib_bh_set(ib_bh, parent, ptr); + + parent = (ptr - (byte*) ib_bh_first(ib_bh)) + / ib_bh->sizeof_elem; + + if ((parent << 1) >= ib_bh_size(ib_bh)) { + break; + } + + ptr = (byte*) ib_bh_get(ib_bh, parent << 1); + } + + --ib_bh->n_elems; + + ib_bh_set(ib_bh, parent, last); +} === removed file 'storage/innobase/ut/ut0bh.c' --- a/storage/innobase/ut/ut0bh.c 2010-05-28 10:12:31 +0000 +++ b/storage/innobase/ut/ut0bh.c 1970-01-01 00:00:00 +0000 @@ -1,265 +0,0 @@ -/***************************************************************************//** - -Copyright (c) 2010, Oracle Corpn. All Rights Reserved. - -Portions of this file contain modifications contributed and copyrighted by -Sun Microsystems, Inc. Those modifications are gratefully acknowledged and -are described briefly in the InnoDB documentation. The contributions by -Sun Microsystems are incorporated with their permission, and subject to the -conditions contained in the file COPYING.Sun_Microsystems. - -This program is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free Software -Foundation; version 2 of the License. - -This program is distributed in the hope that it will be useful, but WITHOUT -ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS -FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. - -You should have received a copy of the GNU General Public License along with -this program; if not, write to the Free Software Foundation, Inc., 59 Temple -Place, Suite 330, Boston, MA 02111-1307 USA - -*****************************************************************************/ - -/******************************************************************//** -@file ut/ut0bh.c -Binary min-heap implementation. - -Created 2010-05-28 by Sunny Bains -*******************************************************/ - -#include "ut0bh.h" -#include "ut0mem.h" - -#include - -/** Binary heap data structure */ -struct ib_bh_struct { - ulint max_elems; /*!< max elements allowed */ - ulint n_elems; /*!< current size */ - ulint sizeof_elem; /*!< sizeof element */ - ib_bh_cmp_t compare; /*!< comparator */ -}; - -/**********************************************************************//** -Get the number of elements in the binary heap. -@return number of elements */ -UNIV_INTERN -ulint -ib_bh_size( -/*=======*/ - const ib_bh_t* ib_bh) /*!< in: instance */ -{ - return(ib_bh->n_elems); -} - -/**********************************************************************//** -Test if binary heap is empty. -@return TRUE if empty. */ -UNIV_INTERN -ibool -ib_bh_is_empty( -/*===========*/ - const ib_bh_t* ib_bh) /*!< in: instance */ -{ - return(ib_bh_size(ib_bh) == 0); -} - -/**********************************************************************//** -Test if binary heap is full. -@return TRUE if full. */ -UNIV_INTERN -ibool -ib_bh_is_full( -/*===========*/ - const ib_bh_t* ib_bh) /*!< in: instance */ -{ - return(ib_bh_size(ib_bh) >= ib_bh->max_elems); -} - -/**********************************************************************//** -Get a pointer to the element. -@return pointer to element */ -UNIV_INTERN -void* -ib_bh_get( -/*=======*/ - ib_bh_t* ib_bh, /*!< in: instance */ - ulint i) /*!< in: index */ -{ - byte* ptr = (byte*) (ib_bh + 1); - - ut_a(i < ib_bh_size(ib_bh)); - - return(ptr + (ib_bh->sizeof_elem * i)); -} - -/**********************************************************************//** -Copy an element to the binary heap. -@return pointer to copied element */ -UNIV_INTERN -void* -ib_bh_set( -/*======*/ - ib_bh_t* ib_bh, /*!< in,out: instance */ - ulint i, /*!< in: index */ - const void* elem) /*!< in: element to add */ -{ - void* ptr = ib_bh_get(ib_bh, i); - - memcpy(ptr, elem, ib_bh->sizeof_elem); - - return(ptr); -} - -/**********************************************************************//** -Create a binary heap. -@return a new binary heap */ -UNIV_INTERN -ib_bh_t* -ib_bh_create( -/*=========*/ - ib_bh_cmp_t compare, /*!< in: comparator */ - ulint sizeof_elem, /*!< in: size of one element */ - ulint max_elems) /*!< in: max elements allowed */ -{ - ulint sz; - ib_bh_t* ib_bh; - - sz = sizeof(*ib_bh) + (sizeof_elem * max_elems); - - ib_bh = (ib_bh_t*) ut_malloc(sz); - memset(ib_bh, 0x0, sz); - - ib_bh->compare = compare; - ib_bh->max_elems = max_elems; - ib_bh->sizeof_elem = sizeof_elem; - - return(ib_bh); -} - -/**********************************************************************//** -Free a binary heap. -@return a new binary heap */ -UNIV_INTERN -void -ib_bh_free( -/*=======*/ - ib_bh_t* ib_bh) /*!< in,own: instance */ -{ - ut_free(ib_bh); -} - -/**********************************************************************//** -Add an element to the binary heap. Note: The element is copied. -@return pointer to added element or NULL if full. */ -UNIV_INTERN -void* -ib_bh_push( -/*=======*/ - ib_bh_t* ib_bh, /*!< in,out: instance */ - const void* elem) /*!< in: element to add */ -{ - void* ptr = NULL; - - if (!ib_bh_is_full(ib_bh)) { - ulint i; - - if (ib_bh_is_empty(ib_bh)) { - ++ib_bh->n_elems; - return(ib_bh_set(ib_bh, 0, elem)); - } - - i = ib_bh->n_elems; - - ++ib_bh->n_elems; - - for (ptr = ib_bh_get(ib_bh, i >> 1); - i > 0 && ib_bh->compare(ptr, elem) > 0; - i >>= 1, ptr = ib_bh_get(ib_bh, i >> 1)) { - - ib_bh_set(ib_bh, i, ptr); - } - - ptr = ib_bh_set(ib_bh, i, elem); - } - - return(ptr); -} - -/**********************************************************************//** -Return the first element from the binary heap. -@return pointer to first element or NULL if empty. */ -UNIV_INTERN -void* -ib_bh_first( -/*========*/ - ib_bh_t* ib_bh) /*!< in,out: instance */ -{ - return(ib_bh_is_empty(ib_bh) ? NULL : ib_bh_get(ib_bh, 0)); -} - -/**********************************************************************//** -Return the last element from the binary heap. -@return pointer to last element or NULL if empty. */ -UNIV_INTERN -void* -ib_bh_last( -/*========*/ - ib_bh_t* ib_bh) /*!< in,out: instance */ -{ - return(ib_bh_is_empty(ib_bh) - ? NULL - : ib_bh_get(ib_bh, ib_bh_size(ib_bh) - 1)); -} - -/**********************************************************************//** -Remove the first element from the binary heap. */ -UNIV_INTERN -void -ib_bh_pop( -/*======*/ - ib_bh_t* ib_bh) /*!< in,out: instance */ -{ - byte* ptr; - byte* last; - ulint parent = 0; - - if (ib_bh_is_empty(ib_bh)) { - return; - } else if (ib_bh_size(ib_bh) == 1) { - --ib_bh->n_elems; - return; - } - - last = (byte*) ib_bh_last(ib_bh); - - /* Start from the child node */ - ptr = (byte*) ib_bh_get(ib_bh, 1); - - while (ptr < last) { - /* If the "right" child node is < "left" child node */ - if (ib_bh->compare(ptr + ib_bh->sizeof_elem, ptr) < 0) { - ptr += ib_bh->sizeof_elem; - } - - if (ib_bh->compare(last, ptr) > 0) { - ib_bh_set(ib_bh, parent, ptr); - } else { - break; - } - - parent = (ptr - (byte*) ib_bh_first(ib_bh)) / ib_bh->sizeof_elem; - - if ((parent << 1) < ib_bh_size(ib_bh)) { - ptr = (byte*) ib_bh_get(ib_bh, parent << 1); - } else { - break; - } - } - - --ib_bh->n_elems; - - ib_bh_set(ib_bh, parent, last); -} === modified file 'storage/innobase/ut/ut0ut.c' --- a/storage/innobase/ut/ut0ut.c 2010-11-15 07:11:18 +0000 +++ b/storage/innobase/ut/ut0ut.c 2011-02-22 05:11:15 +0000 @@ -1,13 +1,6 @@ /***************************************************************************** -Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved. -Copyright (c) 2009, Sun Microsystems, Inc. - -Portions of this file contain modifications contributed and copyrighted by -Sun Microsystems, Inc. Those modifications are gratefully acknowledged and -are described briefly in the InnoDB documentation. The contributions by -Sun Microsystems are incorporated with their permission, and subject to the -conditions contained in the file COPYING.Sun_Microsystems. +Copyright (c) 2011, Oracle Corpn. All Rights Reserved. This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software No bundle (reason: revision is a merge).