List:Commits« Previous MessageNext Message »
From:Bjorn Munch Date:February 24 2011 3:55pm
Subject:bzr push into mysql-trunk-mtr branch (bjorn.munch:3031 to 3033)
View as plain text  
 3033 Bjorn Munch	2011-02-24 [merge]
      upmerge some follow-up fixes from 5.5-mtr

    modified:
      mysql-test/lib/My/Find.pm
      mysql-test/mysql-test-run.pl
      mysql-test/suite/rpl/r/rpl_slave_load_remove_tmpfile.result
      mysql-test/suite/rpl/t/rpl_slave_load_remove_tmpfile.test
 3032 Bjorn Munch	2011-02-24 [merge]
      merge from trunk

    removed:
      storage/innobase/include/ut0bh.h
      storage/innobase/ut/ut0bh.c
    added:
      storage/innobase/include/ut0bh.h
      storage/innobase/include/ut0bh.ic
      storage/innobase/ut/ut0bh.c
    modified:
      mysql-test/r/filesort_debug.result
      mysql-test/suite/sys_vars/r/all_vars.result
      mysql-test/t/filesort_debug.test
      sql/filesort.cc
      storage/innobase/CMakeLists.txt
      storage/innobase/handler/ha_innodb.cc
      storage/innobase/include/buf0flu.h
      storage/innobase/include/srv0srv.h
      storage/innobase/include/sync0sync.h
      storage/innobase/include/trx0purge.h
      storage/innobase/include/trx0rseg.h
      storage/innobase/include/trx0sys.h
      storage/innobase/include/trx0sys.ic
      storage/innobase/srv/srv0srv.c
      storage/innobase/srv/srv0start.c
      storage/innobase/sync/sync0sync.c
      storage/innobase/trx/trx0purge.c
      storage/innobase/trx/trx0rseg.c
      storage/innobase/trx/trx0sys.c
      storage/innobase/trx/trx0trx.c
      storage/innobase/ut/ut0ut.c
 3031 Bjorn Munch	2011-02-23 [merge]
      Upmerge 11762407 (54999) with additions

    modified:
      mysql-test/extra/binlog_tests/binlog.test
      mysql-test/extra/rpl_tests/rpl_binlog_max_cache_size.test
      mysql-test/extra/rpl_tests/rpl_conflicts.test
      mysql-test/extra/rpl_tests/rpl_extra_col_master.test
      mysql-test/extra/rpl_tests/rpl_extra_col_slave.test
      mysql-test/extra/rpl_tests/rpl_loaddata.test
      mysql-test/extra/rpl_tests/rpl_row_basic.test
      mysql-test/extra/rpl_tests/rpl_row_empty_imgs.test
      mysql-test/extra/rpl_tests/rpl_row_tabledefs.test
      mysql-test/extra/rpl_tests/rpl_stm_EE_err2.test
      mysql-test/extra/rpl_tests/rpl_stop_middle_group.test
      mysql-test/include/mtr_warnings.sql
      mysql-test/r/flush_read_lock.result
      mysql-test/suite/binlog/r/binlog_base64_flag.result
      mysql-test/suite/binlog/r/binlog_row_binlog.result
      mysql-test/suite/binlog/r/binlog_stm_binlog.result
      mysql-test/suite/binlog/t/binlog_base64_flag.test
      mysql-test/suite/rpl/r/rpl_binlog_corruption.result
      mysql-test/suite/rpl/r/rpl_bug33931.result
      mysql-test/suite/rpl/r/rpl_checksum.result
      mysql-test/suite/rpl/r/rpl_checksum_cache.result
      mysql-test/suite/rpl/r/rpl_circular_for_4_hosts.result
      mysql-test/suite/rpl/r/rpl_corruption.result
      mysql-test/suite/rpl/r/rpl_extra_col_master_innodb.result
      mysql-test/suite/rpl/r/rpl_extra_col_master_myisam.result
      mysql-test/suite/rpl/r/rpl_extra_col_slave_innodb.result
      mysql-test/suite/rpl/r/rpl_extra_col_slave_myisam.result
      mysql-test/suite/rpl/r/rpl_filter_tables_not_exist.result
      mysql-test/suite/rpl/r/rpl_heartbeat_basic.result
      mysql-test/suite/rpl/r/rpl_idempotency.result
      mysql-test/suite/rpl/r/rpl_ignore_table.result
      mysql-test/suite/rpl/r/rpl_incident.result
      mysql-test/suite/rpl/r/rpl_init_slave_errors.result
      mysql-test/suite/rpl/r/rpl_known_bugs_detection.result
      mysql-test/suite/rpl/r/rpl_loaddata.result
      mysql-test/suite/rpl/r/rpl_loaddata_fatal.result
      mysql-test/suite/rpl/r/rpl_mixed_binlog_max_cache_size.result
      mysql-test/suite/rpl/r/rpl_rotate_logs.result
      mysql-test/suite/rpl/r/rpl_row_basic_11bugs.result
      mysql-test/suite/rpl/r/rpl_row_basic_2myisam.result
      mysql-test/suite/rpl/r/rpl_row_basic_3innodb.result
      mysql-test/suite/rpl/r/rpl_row_binlog_max_cache_size.result
      mysql-test/suite/rpl/r/rpl_row_colSize.result
      mysql-test/suite/rpl/r/rpl_row_conflicts.result
      mysql-test/suite/rpl/r/rpl_row_idempotency.result
      mysql-test/suite/rpl/r/rpl_row_img_eng_full.result
      mysql-test/suite/rpl/r/rpl_row_img_sanity.result
      mysql-test/suite/rpl/r/rpl_row_inexist_tbl.result
      mysql-test/suite/rpl/r/rpl_row_tabledefs_2myisam.result
      mysql-test/suite/rpl/r/rpl_row_tabledefs_3innodb.result
      mysql-test/suite/rpl/r/rpl_show_errors.result
      mysql-test/suite/rpl/r/rpl_skip_error.result
      mysql-test/suite/rpl/r/rpl_slave_grp_exec.result
      mysql-test/suite/rpl/r/rpl_slave_load_tmpdir_not_exist.result
      mysql-test/suite/rpl/r/rpl_slave_start.result
      mysql-test/suite/rpl/r/rpl_slow_query_log.result
      mysql-test/suite/rpl/r/rpl_stm_EE_err2.result
      mysql-test/suite/rpl/r/rpl_stm_binlog_max_cache_size.result
      mysql-test/suite/rpl/r/rpl_stm_conflicts.result
      mysql-test/suite/rpl/r/rpl_stm_loaddata_concurrent.result
      mysql-test/suite/rpl/r/rpl_stm_start_stop_slave.result
      mysql-test/suite/rpl/r/rpl_stm_stop_middle_group.result
      mysql-test/suite/rpl/r/rpl_stop_slave.result
      mysql-test/suite/rpl/r/rpl_temporary_errors.result
      mysql-test/suite/rpl/r/rpl_typeconv.result
      mysql-test/suite/rpl/t/rpl_binlog_corruption.test
      mysql-test/suite/rpl/t/rpl_bug33931.test
      mysql-test/suite/rpl/t/rpl_checksum.test
      mysql-test/suite/rpl/t/rpl_checksum_cache.test
      mysql-test/suite/rpl/t/rpl_circular_for_4_hosts.test
      mysql-test/suite/rpl/t/rpl_corruption.test
      mysql-test/suite/rpl/t/rpl_filter_tables_not_exist.test
      mysql-test/suite/rpl/t/rpl_heartbeat_basic.test
      mysql-test/suite/rpl/t/rpl_idempotency.test
      mysql-test/suite/rpl/t/rpl_ignore_table.test
      mysql-test/suite/rpl/t/rpl_incident.test
      mysql-test/suite/rpl/t/rpl_init_slave_errors.test
      mysql-test/suite/rpl/t/rpl_known_bugs_detection.test
      mysql-test/suite/rpl/t/rpl_loaddata_fatal.test
      mysql-test/suite/rpl/t/rpl_rotate_logs.test
      mysql-test/suite/rpl/t/rpl_row_basic_11bugs.test
      mysql-test/suite/rpl/t/rpl_row_colSize.test
      mysql-test/suite/rpl/t/rpl_row_idempotency.test
      mysql-test/suite/rpl/t/rpl_row_img_sanity.test
      mysql-test/suite/rpl/t/rpl_row_inexist_tbl.test
      mysql-test/suite/rpl/t/rpl_show_errors.test
      mysql-test/suite/rpl/t/rpl_skip_error.test
      mysql-test/suite/rpl/t/rpl_slave_grp_exec.test
      mysql-test/suite/rpl/t/rpl_slave_load_tmpdir_not_exist.test
      mysql-test/suite/rpl/t/rpl_slave_start.test
      mysql-test/suite/rpl/t/rpl_slow_query_log.test
      mysql-test/suite/rpl/t/rpl_stm_start_stop_slave.test
      mysql-test/suite/rpl/t/rpl_stop_slave.test
      mysql-test/suite/rpl/t/rpl_temporary_errors.test
      mysql-test/suite/rpl/t/rpl_typeconv.test
      mysql-test/t/flush_read_lock.test
=== modified file 'mysql-test/lib/My/Find.pm'
--- a/mysql-test/lib/My/Find.pm	2011-01-18 10:21:37 +0000
+++ b/mysql-test/lib/My/Find.pm	2011-02-24 14:11:05 +0000
@@ -28,8 +28,6 @@ use My::Platform;
 use base qw(Exporter);
 our @EXPORT= qw(my_find_bin my_find_dir my_find_file NOT_REQUIRED);
 
-our $vs_config_dir;
-
 my $bin_extension= ".exe" if IS_WINDOWS;
 
 # Helper function to be used for fourth parameter to find functions
@@ -158,7 +156,8 @@ sub my_find_paths {
   # User can select to look in a special build dir
   # which is a subdirectory of any of the paths
   my @extra_dirs;
-  my $build_dir= $vs_config_dir || $ENV{MTR_VS_CONFIG} || $ENV{MTR_BUILD_DIR};
+  my $build_dir= $::opt_config_dir || $ENV{MTR_VS_CONFIG}
+                                   || $ENV{MTR_BUILD_DIR};
   push(@extra_dirs, $build_dir) if defined $build_dir;
 
   if (defined $extension){

=== modified file 'mysql-test/mysql-test-run.pl'
--- a/mysql-test/mysql-test-run.pl	2011-02-21 13:56:58 +0000
+++ b/mysql-test/mysql-test-run.pl	2011-02-24 15:48:27 +0000
@@ -5702,6 +5702,7 @@ sub run_ctest() {
   chdir ($bindir) or die ("Could not chdir to $bindir");
   my $tinfo;
   my $no_ctest= (IS_WINDOWS) ? 256 : -1;
+  my $ctest_vs= "";
 
   # Just ignore if not configured/built to run ctest
   if (! -f "CTestTestfile.cmake") {
@@ -5709,9 +5710,12 @@ sub run_ctest() {
     return;
   }
 
+  # Add vs-config option if needed
+  $ctest_vs= "-C $opt_vs_config" if $opt_vs_config;
+
   # Also silently ignore if we don't have ctest and didn't insist
   # Now, run ctest and collect output
-  my $ctest_out= `ctest 2>&1`;
+  my $ctest_out= `ctest $ctest_vs 2>&1`;
   if ($? == $no_ctest && $opt_ctest == -1) {
     chdir($olddir);
     return;

=== modified file 'mysql-test/r/filesort_debug.result'
--- a/mysql-test/r/filesort_debug.result	2011-02-02 13:41:10 +0000
+++ b/mysql-test/r/filesort_debug.result	2011-02-24 07:18:03 +0000
@@ -26,3 +26,20 @@ DELETE FROM t1 ORDER BY (f1(10)) LIMIT 1
 ERROR 42000: Incorrect number of arguments for FUNCTION test.f1; expected 0, got 1
 DROP TABLE t1;
 DROP FUNCTION f1;
+#
+# Bug #11747102
+# 30771: LOG MORE INFO ABOUT THREADS KILL'D AND SORT ABORTED MESSAGES
+#
+# connection 1
+CREATE TABLE t1(f0 int auto_increment primary key, f1 int);
+INSERT INTO t1(f1) VALUES (0),(1),(2),(3),(4),(5);
+SET DEBUG_SYNC='filesort_start SIGNAL filesort_started WAIT_FOR filesort_killed';
+# Sending: (not reaped since connection is killed later)
+SELECT * FROM t1 ORDER BY f1 ASC, f0;
+# connection 2
+SET DEBUG_SYNC='now WAIT_FOR filesort_started';
+KILL @id;
+SET DEBUG_SYNC='now SIGNAL filesort_killed';
+# connection default
+SET DEBUG_SYNC= "RESET";
+DROP TABLE t1;

=== modified file 'mysql-test/suite/rpl/r/rpl_slave_load_remove_tmpfile.result'
--- a/mysql-test/suite/rpl/r/rpl_slave_load_remove_tmpfile.result	2011-02-23 20:01:27 +0000
+++ b/mysql-test/suite/rpl/r/rpl_slave_load_remove_tmpfile.result	2011-02-24 15:48:27 +0000
@@ -13,5 +13,6 @@ include/stop_slave_io.inc
 RESET SLAVE;
 drop table t1;
 call mtr.add_suppression("Slave: Can't get stat of .*");
+call mtr.add_suppression("Slave SQL: Error .Can.t get stat of.* Error_code: 13");
 call mtr.add_suppression("Slave: File.* not found.*");
 include/rpl_end.inc

=== modified file 'mysql-test/suite/rpl/t/rpl_slave_load_remove_tmpfile.test'
--- a/mysql-test/suite/rpl/t/rpl_slave_load_remove_tmpfile.test	2011-02-23 20:01:27 +0000
+++ b/mysql-test/suite/rpl/t/rpl_slave_load_remove_tmpfile.test	2011-02-24 15:48:27 +0000
@@ -54,6 +54,7 @@ RESET SLAVE;
 drop table t1;
 
 call mtr.add_suppression("Slave: Can't get stat of .*");
+call mtr.add_suppression("Slave SQL: Error .Can.t get stat of.* Error_code: 13");
 call mtr.add_suppression("Slave: File.* not found.*");
 --let $rpl_only_running_threads= 1
 --source include/rpl_end.inc

=== modified file 'mysql-test/suite/sys_vars/r/all_vars.result'
--- a/mysql-test/suite/sys_vars/r/all_vars.result	2011-02-17 14:06:09 +0000
+++ b/mysql-test/suite/sys_vars/r/all_vars.result	2011-02-23 14:57:29 +0000
@@ -13,6 +13,7 @@ select variable_name as `There should be
 left join t1 on variable_name=test_name where test_name is null;
 There should be *no* variables listed below:
 INNODB_STATS_TRANSIENT_SAMPLE_PAGES
+INNODB_ROLLBACK_SEGMENTS
 INNODB_STATS_PERSISTENT_SAMPLE_PAGES
 RELAY_LOG_BASENAME
 LOG_BIN_BASENAME
@@ -26,6 +27,7 @@ INNODB_MONITOR_DISABLE
 INNODB_FILE_FORMAT_MAX
 INNODB_MONITOR_ENABLE
 INNODB_STATS_TRANSIENT_SAMPLE_PAGES
+INNODB_ROLLBACK_SEGMENTS
 INNODB_STATS_PERSISTENT_SAMPLE_PAGES
 RELAY_LOG_BASENAME
 LOG_BIN_BASENAME

=== modified file 'mysql-test/t/filesort_debug.test'
--- a/mysql-test/t/filesort_debug.test	2011-02-02 13:41:10 +0000
+++ b/mysql-test/t/filesort_debug.test	2011-02-24 07:18:03 +0000
@@ -1,4 +1,6 @@
 --source include/have_debug.inc
+--source include/have_debug_sync.inc
+--source include/count_sessions.inc
 
 SET @old_debug= @@session.debug;
 
@@ -38,3 +40,37 @@ DELETE FROM t1 ORDER BY (f1(10)) LIMIT 1
 
 DROP TABLE t1;
 DROP FUNCTION f1;
+
+--echo #
+--echo # Bug #11747102
+--echo # 30771: LOG MORE INFO ABOUT THREADS KILL'D AND SORT ABORTED MESSAGES
+--echo #
+
+connect (con1, localhost, root);
+connect (con2, localhost, root);
+
+--echo # connection 1
+connection con1;
+CREATE TABLE t1(f0 int auto_increment primary key, f1 int);
+INSERT INTO t1(f1) VALUES (0),(1),(2),(3),(4),(5);
+
+let $ID= `SELECT @id := CONNECTION_ID()`;
+
+SET DEBUG_SYNC='filesort_start SIGNAL filesort_started WAIT_FOR filesort_killed';
+--echo # Sending: (not reaped since connection is killed later)
+--send SELECT * FROM t1 ORDER BY f1 ASC, f0
+
+--echo # connection 2
+connection con2;
+let $ignore= `SELECT @id := $ID`;
+SET DEBUG_SYNC='now WAIT_FOR filesort_started';
+KILL @id;
+SET DEBUG_SYNC='now SIGNAL filesort_killed';
+
+--echo # connection default
+connection default;
+disconnect con1;
+disconnect con2;
+--source include/wait_until_count_sessions.inc
+SET DEBUG_SYNC= "RESET";
+DROP TABLE t1;

=== modified file 'sql/filesort.cc'
--- a/sql/filesort.cc	2011-02-02 13:41:10 +0000
+++ b/sql/filesort.cc	2011-02-24 07:18:03 +0000
@@ -35,6 +35,7 @@
 #include "bounded_queue.h"
 #include "filesort_utils.h"
 #include "sql_select.h"
+#include "debug_sync.h"
 
 #ifdef HAVE_EXPLICIT_TEMPLATE_INSTANTIATION
 template class Bounded_queue<uchar, uchar>;
@@ -164,6 +165,7 @@ ha_rows filesort(THD *thd, TABLE *table,
   Item_subselect *subselect= tab ? tab->containing_subselect() : 0;
 
   MYSQL_FILESORT_START(table->s->db.str, table->s->table_name.str);
+  DEBUG_SYNC(thd, "filesort_start");
 
   /*
    Release InnoDB's adaptive hash index latch (if holding) before
@@ -357,12 +359,13 @@ ha_rows filesort(THD *thd, TABLE *table,
   }
   if (error)
   {
-    DBUG_ASSERT(thd->is_error());
+    int kill_errno= thd->killed_errno();
+    DBUG_ASSERT(thd->is_error() || kill_errno);
     my_printf_error(ER_FILSORT_ABORT,
                     "%s: %s",
                     MYF(ME_ERROR + ME_WAITTANG),
                     ER_THD(thd, ER_FILSORT_ABORT),
-                    thd->stmt_da->message());
+                    kill_errno ? ER(kill_errno) : thd->stmt_da->message());
                     
     if (global_system_variables.log_warnings > 1)
     {

=== modified file 'storage/innobase/CMakeLists.txt'
--- a/storage/innobase/CMakeLists.txt	2011-01-21 16:14:47 +0000
+++ b/storage/innobase/CMakeLists.txt	2011-02-22 05:11:15 +0000
@@ -1,4 +1,4 @@
-# Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved.
+# Copyright (c) 2006, 2011, Oracle and/or its affiliates. All rights reserved.
 # 
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License as published by

=== modified file 'storage/innobase/handler/ha_innodb.cc'
--- a/storage/innobase/handler/ha_innodb.cc	2011-02-18 13:00:57 +0000
+++ b/storage/innobase/handler/ha_innodb.cc	2011-02-24 15:48:27 +0000
@@ -272,7 +272,7 @@ static PSI_mutex_info all_innodb_mutexes
 #  endif /* UNIV_MEM_DEBUG */
 	{&mem_pool_mutex_key, "mem_pool_mutex", 0},
 	{&mutex_list_mutex_key, "mutex_list_mutex", 0},
-	{&purge_sys_mutex_key, "purge_sys_mutex", 0},
+	{&purge_sys_bh_mutex_key, "purge_sys_bh_mutex", 0},
 	{&recv_sys_mutex_key, "recv_sys_mutex", 0},
 	{&rseg_mutex_key, "rseg_mutex", 0},
 #  ifdef UNIV_SYNC_DEBUG
@@ -12064,13 +12064,20 @@ static MYSQL_SYSVAR_ULONG(io_capacity, s
 
 static MYSQL_SYSVAR_ULONG(purge_batch_size, srv_purge_batch_size,
   PLUGIN_VAR_OPCMDARG,
-  "Number of UNDO logs to purge in one batch from the history list. "
-  "Default is 20",
+  "Number of UNDO log pages to purge in one batch from the history list.",
   NULL, NULL,
   20,			/* Default setting */
   1,			/* Minimum value */
   5000, 0);		/* Maximum value */
 
+static MYSQL_SYSVAR_ULONG(rollback_segments, srv_rollback_segments,
+  PLUGIN_VAR_OPCMDARG,
+  "Number of UNDO logs to use.",
+  NULL, NULL,
+  128,			/* Default setting */
+  1,			/* Minimum value */
+  TRX_SYS_N_RSEGS, 0);	/* Maximum value */
+
 static MYSQL_SYSVAR_ULONG(purge_threads, srv_n_purge_threads,
   PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
   "Purge threads can be from 0 to 32. Default is 0.",
@@ -12494,6 +12501,7 @@ static struct st_mysql_sys_var* innobase
   MYSQL_SYSVAR(page_hash_locks),
 #endif /* defined UNIV_DEBUG || defined UNIV_PERF_DEBUG */
   MYSQL_SYSVAR(print_all_deadlocks),
+  MYSQL_SYSVAR(rollback_segments),
   NULL
 };
 

=== modified file 'storage/innobase/include/buf0flu.h'
--- a/storage/innobase/include/buf0flu.h	2010-12-14 04:45:32 +0000
+++ b/storage/innobase/include/buf0flu.h	2011-02-22 05:11:15 +0000
@@ -141,7 +141,18 @@ UNIV_INTERN
 void
 buf_flush_wait_batch_end(
 /*=====================*/
-	buf_pool_t*	buf_pool,	/*!< buffer pool instance */
+	buf_pool_t*	buf_pool,	/*!< in: buffer pool instance */
+	enum buf_flush	type);		/*!< in: BUF_FLUSH_LRU
+					or BUF_FLUSH_LIST */
+/******************************************************************//**
+Waits until a flush batch of the given type ends. This is called by
+a thread that only wants to wait for a flush to end but doesn't do
+any flushing itself. */
+UNIV_INTERN
+void
+buf_flush_wait_batch_end_wait_only(
+/*===============================*/
+	buf_pool_t*	buf_pool,	/*!< in: buffer pool instance */
 	enum buf_flush	type);		/*!< in: BUF_FLUSH_LRU
 					or BUF_FLUSH_LIST */
 /********************************************************************//**

=== modified file 'storage/innobase/include/srv0srv.h'
--- a/storage/innobase/include/srv0srv.h	2011-02-10 15:01:01 +0000
+++ b/storage/innobase/include/srv0srv.h	2011-02-22 05:11:15 +0000
@@ -297,9 +297,12 @@ extern ulint srv_log_waits;
 /* the number of purge threads to use from the worker pool (currently 0 or 1) */
 extern ulong srv_n_purge_threads;
 
-/* the number of records to purge in one batch */
+/* the number of pages to purge in one batch */
 extern ulong srv_purge_batch_size;
 
+/* the number of rollback segments to use */
+extern ulong srv_rollback_segments;
+
 /* variable that counts amount of data read in total (in bytes) */
 extern ulint srv_data_read;
 

=== modified file 'storage/innobase/include/sync0sync.h'
--- a/storage/innobase/include/sync0sync.h	2011-02-15 09:40:34 +0000
+++ b/storage/innobase/include/sync0sync.h	2011-02-22 05:11:15 +0000
@@ -90,7 +90,7 @@ extern mysql_pfs_key_t	mem_hash_mutex_ke
 # endif /* UNIV_MEM_DEBUG */
 extern mysql_pfs_key_t	mem_pool_mutex_key;
 extern mysql_pfs_key_t	mutex_list_mutex_key;
-extern mysql_pfs_key_t	purge_sys_mutex_key;
+extern mysql_pfs_key_t	purge_sys_bh_mutex_key;
 extern mysql_pfs_key_t	recv_sys_mutex_key;
 extern mysql_pfs_key_t	rseg_mutex_key;
 # ifdef UNIV_SYNC_DEBUG
@@ -660,7 +660,6 @@ or row lock! */
 #define SYNC_TREE_NODE_NEW	892
 #define SYNC_TREE_NODE_FROM_HASH 891
 #define SYNC_TREE_NODE		890
-#define	SYNC_PURGE_SYS		810
 #define	SYNC_PURGE_LATCH	800
 #define	SYNC_TRX_UNDO		700
 #define SYNC_RSEG		600
@@ -686,6 +685,7 @@ or row lock! */
 #define SYNC_THREADS		295
 #define SYNC_REC_LOCK		294
 #define SYNC_TRX_SYS_HEADER	290
+#define	SYNC_PURGE_QUEUE	200
 #define SYNC_LOG		170
 #define SYNC_LOG_FLUSH_ORDER	147
 #define SYNC_RECV		168

=== modified file 'storage/innobase/include/trx0purge.h'
--- a/storage/innobase/include/trx0purge.h	2011-01-05 22:49:20 +0000
+++ b/storage/innobase/include/trx0purge.h	2011-02-22 05:11:15 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2011, Innobase Oy. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -70,7 +70,8 @@ UNIV_INTERN
 void
 trx_purge_sys_create(
 /*=================*/
-	ulint	n_purge_threads);	/*!< in: number of purge threads */
+	ulint		n_purge_threads,/*!< in: number of purge threads */
+	ib_bh_t*	ib_bh);		/*!< in/own: UNDO log min binary heap*/
 /********************************************************************//**
 Frees the global purge system control structure. */
 UNIV_INTERN
@@ -140,9 +141,10 @@ struct trx_purge_struct{
 					worker threads */
 	ulint		n_completed;	/*!< Count of total tasks completed */
 
-	mutex_t		mutex;		/*!< Mutex protecting the fields
-					below */
-	/*-----------------------------*/
+	/*------------------------------*/
+	/* The following two fields form the 'purge pointer' which advances
+	during a purge, and which is used in history list truncation */
+
 	purge_iter_t	iter;		/* Limit up to which we have read and
 					parsed the UNDO log records.  Not
 					necessarily purged from the indexes.
@@ -173,6 +175,11 @@ struct trx_purge_struct{
 	mem_heap_t*	heap;		/*!< Temporary storage used during a
 					purge: can be emptied after purge
 					completes */
+	/*-----------------------------*/
+	ib_bh_t*	ib_bh;		/*!< Binary min-heap, ordered on
+					rseg_queue_t::trx_no. It is protected
+					by the bh_mutex */
+	mutex_t		bh_mutex;	/*!< Mutex protecting ib_bh */
 };
 
 /** Info required to purge a record */
@@ -183,19 +190,6 @@ struct trx_purge_rec_struct {
 
 typedef struct trx_purge_rec_struct trx_purge_rec_t;
 
-/** Test if purge mutex is owned. */
-#define purge_mutex_own() mutex_own(&purge_sys->mutex)
-
-/** Acquire the flush list mutex. */
-#define purge_mutex_enter() do {		\
-	mutex_enter(&purge_sys->mutex);		\
-} while (0)
-
-/** Release the purge mutex. */
-# define purge_mutex_exit() do {	\
-	mutex_exit(&purge_sys->mutex);	\
-} while (0)
-
 #ifndef UNIV_NONINL
 #include "trx0purge.ic"
 #endif

=== modified file 'storage/innobase/include/trx0rseg.h'
--- a/storage/innobase/include/trx0rseg.h	2011-01-18 21:14:22 +0000
+++ b/storage/innobase/include/trx0rseg.h	2011-02-22 05:11:15 +0000
@@ -113,6 +113,7 @@ void
 trx_rseg_array_init(
 /*================*/
 	trx_sysf_t*	sys_header,	/*!< in/out: trx system header */
+	ib_bh_t*	ib_bh,		/*!< in: rseg queue */
 	mtr_t*		mtr);		/*!< in/out: mtr */
 /***************************************************************************
 Free's an instance of the rollback segment in memory. */

=== modified file 'storage/innobase/include/trx0sys.h'
--- a/storage/innobase/include/trx0sys.h	2011-02-10 01:56:07 +0000
+++ b/storage/innobase/include/trx0sys.h	2011-02-22 05:11:15 +0000
@@ -38,6 +38,7 @@ Created 3/26/1996 Heikki Tuuri
 #include "mem0mem.h"
 #include "sync0sync.h"
 #include "ut0lst.h"
+#include "ut0bh.h"
 #include "read0types.h"
 #include "page0types.h"
 #include "ut0bh.h"
@@ -124,9 +125,10 @@ trx_sys_hdr_page(
 	ulint	page_no);/*!< in: page number */
 /*****************************************************************//**
 Creates and initializes the central memory structures for the transaction
-system. This is called when the database is started. */
+system. This is called when the database is started. 
+@return min binary heap of rsegs to purge */
 UNIV_INTERN
-void
+ib_bh_t*
 trx_sys_init_at_db_start(void);
 /*==========================*/
 /*****************************************************************//**
@@ -696,9 +698,6 @@ struct trx_sys_struct{
 					list (update undo logs for committed
 					transactions), protected by
 					rseg->mutex */
-	ib_bh_t*	ib_bh;		/*!< Binary min-heap, ordered on
-					rseg_queue_t::trx_no. It is protected
-					by the purge mutex */
 	mutex_t		read_view_mutex;/*!< Protects the view_list */
 	UT_LIST_BASE_NODE_T(read_view_t) view_list;
 					/*!< List of read views sorted

=== modified file 'storage/innobase/include/trx0sys.ic'
--- a/storage/innobase/include/trx0sys.ic	2011-01-24 11:39:09 +0000
+++ b/storage/innobase/include/trx0sys.ic	2011-02-22 05:11:15 +0000
@@ -504,5 +504,4 @@ trx_sys_get_n_trx(void)
 
 	return(n_trx);
 }
-
 #endif /* !UNIV_HOTBACKUP */

=== added file 'storage/innobase/include/ut0bh.h'
--- a/storage/innobase/include/ut0bh.h	1970-01-01 00:00:00 +0000
+++ b/storage/innobase/include/ut0bh.h	2011-02-23 06:48:15 +0000
@@ -0,0 +1,152 @@
+/***************************************************************************//**
+
+Copyright (c) 2011, Oracle Corpn. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/ut0bh.h
+Binary min-heap interface.
+
+Created 2010-05-28 by Sunny Bains
+*******************************************************/
+
+#ifndef INNOBASE_UT0BH_H
+#define INNOBASE_UT0BH_H
+
+#include "univ.i"
+
+/** Comparison function for objects in the binary heap. */
+typedef int (*ib_bh_cmp_t)(const void* p1, const void* p2);
+
+typedef struct ib_bh_struct ib_bh_t;
+
+/**********************************************************************//**
+Get the number of elements in the binary heap.
+@return number of elements */
+UNIV_INLINE
+ulint
+ib_bh_size(
+/*=======*/
+	const ib_bh_t*	ib_bh);			/*!< in: instance */
+
+/**********************************************************************//**
+Test if binary heap is empty.
+@return TRUE if empty. */
+UNIV_INLINE
+ibool
+ib_bh_is_empty(
+/*===========*/
+	const ib_bh_t*	ib_bh);			/*!< in: instance */
+
+/**********************************************************************//**
+Test if binary heap is full.
+@return TRUE if full. */
+UNIV_INLINE
+ibool
+ib_bh_is_full(
+/*===========*/
+	const ib_bh_t*	ib_bh);			/*!< in: instance */
+
+/**********************************************************************//**
+Get a pointer to the element.
+@return pointer to element */
+UNIV_INLINE
+void*
+ib_bh_get(
+/*=======*/
+	ib_bh_t*	ib_bh,			/*!< in: instance */
+	ulint		i);			/*!< in: index */
+
+/**********************************************************************//**
+Copy an element to the binary heap.
+@return pointer to copied element */
+UNIV_INLINE
+void*
+ib_bh_set(
+/*======*/
+	ib_bh_t*	ib_bh,			/*!< in/out: instance */
+	ulint		i,			/*!< in: index */
+	const void*	elem);			/*!< in: element to add */
+
+/**********************************************************************//**
+Return the first element from the binary heap.
+@return pointer to first element or NULL if empty. */
+UNIV_INLINE
+void*
+ib_bh_first(
+/*========*/
+	ib_bh_t*	ib_bh);			/*!< in: instance */
+
+/**********************************************************************//**
+Return the last element from the binary heap.
+@return pointer to last element or NULL if empty. */
+UNIV_INLINE
+void*
+ib_bh_last(
+/*========*/
+	ib_bh_t*	ib_bh);			/*!< in/out: instance */
+
+/**********************************************************************//**
+Create a binary heap.
+@return a new binary heap */
+UNIV_INTERN
+ib_bh_t*
+ib_bh_create(
+/*=========*/
+	ib_bh_cmp_t	compare,		/*!< in: comparator */
+	ulint		sizeof_elem,		/*!< in: size of one element */
+	ulint		max_elems);		/*!< in: max elements allowed */
+
+/**********************************************************************//**
+Free a binary heap.
+@return a new binary heap */
+UNIV_INTERN
+void
+ib_bh_free(
+/*=======*/
+	ib_bh_t*	ib_bh);			/*!< in,own: instance */
+
+/**********************************************************************//**
+Add an element to the binary heap. Note: The element is copied.
+@return pointer to added element or NULL if full. */
+UNIV_INTERN
+void*
+ib_bh_push(
+/*=======*/
+	ib_bh_t*	ib_bh,			/*!< in/out: instance */
+	const void*	elem);			/*!< in: element to add */
+
+/**********************************************************************//**
+Remove the first element from the binary heap. */
+UNIV_INTERN
+void
+ib_bh_pop(
+/*======*/
+	ib_bh_t*	ib_bh);			/*!< in/out: instance */
+
+/** Binary heap data structure */
+struct ib_bh_struct {
+	ulint		max_elems;		/*!< max elements allowed */
+	ulint		n_elems;		/*!< current size */
+	ulint		sizeof_elem;		/*!< sizeof element */
+	ib_bh_cmp_t	compare;		/*!< comparator */
+};
+
+#ifndef UNIV_NONINL
+#include "ut0bh.ic"
+#endif
+
+#endif /* INNOBASE_UT0BH_H */

=== removed file 'storage/innobase/include/ut0bh.h'
--- a/storage/innobase/include/ut0bh.h	2010-06-15 03:48:34 +0000
+++ b/storage/innobase/include/ut0bh.h	1970-01-01 00:00:00 +0000
@@ -1,145 +0,0 @@
-/***************************************************************************//**
-
-Copyright (c) 2010, Oracle Corpn. All Rights Reserved.
-
-Portions of this file contain modifications contributed and copyrighted by
-Sun Microsystems, Inc. Those modifications are gratefully acknowledged and
-are described briefly in the InnoDB documentation. The contributions by
-Sun Microsystems are incorporated with their permission, and subject to the
-conditions contained in the file COPYING.Sun_Microsystems.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file include/ut0bh.h
-Binary min-heap interface.
-
-Created 2010-05-28 by Sunny Bains
-*******************************************************/
-
-#ifndef INNOBASE_UT0BH_H
-#define INNOBASE_UT0BH_H
-
-#include "univ.i"
-
-typedef int (*ib_bh_cmp_t)(const void* p1, const void* p2);
-
-typedef struct ib_bh_struct ib_bh_t;
-
-/**********************************************************************//**
-Get the number of elements in the binary heap.
-@return number of elements */
-UNIV_INTERN
-ulint
-ib_bh_size(
-/*=======*/
-	const ib_bh_t*	ib_bh);			/*!< in: instance */
-
-/**********************************************************************//**
-Test if binary heap is empty.
-@return TRUE if empty. */
-UNIV_INTERN
-ibool
-ib_bh_is_empty(
-/*===========*/
-	const ib_bh_t*	ib_bh);			/*!< in: instance */
-
-/**********************************************************************//**
-Test if binary heap is full.
-@return TRUE if full. */
-UNIV_INTERN
-ibool
-ib_bh_is_full(
-/*===========*/
-	const ib_bh_t*	ib_bh);			/*!< in: instance */
-
-/**********************************************************************//**
-Get a pointer to the element.
-@return pointer to element */
-UNIV_INTERN
-void*
-ib_bh_get(
-/*=======*/
-	ib_bh_t*	ib_bh,			/*!< in: instance */
-	ulint		i);			/*!< in: index */
-
-/**********************************************************************//**
-Copy an element to the binary heap.
-@return pointer to copied element */
-UNIV_INTERN
-void*
-ib_bh_set(
-/*======*/
-	ib_bh_t*	ib_bh,			/*!< in,out: instance */
-	ulint		i,			/*!< in: index */
-	const void*	elem);			/*!< in: element to add */
-
-/**********************************************************************//**
-Create a binary heap.
-@return a new binary heap */
-UNIV_INTERN
-ib_bh_t*
-ib_bh_create(
-/*=========*/
-	ib_bh_cmp_t	compare,		/*!< in: comparator */
-	ulint		sizeof_elem,		/*!< in: size of one element */
-	ulint		max_elems);		/*!< in: max elements allowed */
-
-/**********************************************************************//**
-Free a binary heap.
-@return a new binary heap */
-UNIV_INTERN
-void
-ib_bh_free(
-/*=======*/
-	ib_bh_t*	ib_bh);			/*!< in,own: instance */
-
-/**********************************************************************//**
-Add an element to the binary heap. Note: The element is copied.
-@return pointer to added element or NULL if full. */
-UNIV_INTERN
-void*
-ib_bh_push(
-/*=======*/
-	ib_bh_t*	ib_bh,			/*!< in,out: instance */
-	const void*	elem);			/*!< in: element to add */
-
-/**********************************************************************//**
-Return the first element from the binary heap. 
-@return pointer to first element or NULL if empty. */
-UNIV_INTERN
-void*
-ib_bh_first(
-/*========*/
-	ib_bh_t*	ib_bh);			/*!< in,out: instance */
-
-/**********************************************************************//**
-Return the last element from the binary heap. 
-@return pointer to last element or NULL if empty. */
-UNIV_INTERN
-void*
-ib_bh_last(
-/*========*/
-	ib_bh_t*	ib_bh);			/*!< in,out: instance */
-
-/**********************************************************************//**
-Remove the first element from the binary heap. */
-UNIV_INTERN
-void
-ib_bh_pop(
-/*======*/
-	ib_bh_t*	ib_bh);			/*!< in,out: instance */
-
-#endif /* INNOBASE_UT0BH_H */

=== added file 'storage/innobase/include/ut0bh.ic'
--- a/storage/innobase/include/ut0bh.ic	1970-01-01 00:00:00 +0000
+++ b/storage/innobase/include/ut0bh.ic	2011-02-22 20:24:34 +0000
@@ -0,0 +1,125 @@
+/***************************************************************************//**
+Copyright (c) 2011, Oracle Corpn. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file include/ut0bh.ic
+Binary min-heap implementation.
+
+Created 2011-01-15 by Sunny Bains
+*******************************************************/
+
+#include "ut0bh.h"
+#include "ut0mem.h"	/* For ut_memcpy() */
+
+/**********************************************************************//**
+Get the number of elements in the binary heap.
+@return number of elements */
+UNIV_INLINE
+ulint
+ib_bh_size(
+/*=======*/
+	const ib_bh_t*	ib_bh)			/*!< in: instance */
+{
+	return(ib_bh->n_elems);
+}
+
+/**********************************************************************//**
+Test if binary heap is empty.
+@return TRUE if empty. */
+UNIV_INLINE
+ibool
+ib_bh_is_empty(
+/*===========*/
+	const ib_bh_t*	ib_bh)			/*!< in: instance */
+{
+	return(ib_bh_size(ib_bh) == 0);
+}
+
+/**********************************************************************//**
+Test if binary heap is full.
+@return TRUE if full. */
+UNIV_INLINE
+ibool
+ib_bh_is_full(
+/*===========*/
+	const ib_bh_t*	ib_bh)			/*!< in: instance */
+{
+	return(ib_bh_size(ib_bh) >= ib_bh->max_elems);
+}
+
+/**********************************************************************//**
+Get a pointer to the element.
+@return pointer to element */
+UNIV_INLINE
+void*
+ib_bh_get(
+/*=======*/
+	ib_bh_t*	ib_bh,			/*!< in: instance */
+	ulint		i)			/*!< in: index */
+{
+	byte*		ptr = (byte*) (ib_bh + 1);
+
+	ut_a(i < ib_bh_size(ib_bh));
+
+	return(ptr + (ib_bh->sizeof_elem * i));
+}
+
+/**********************************************************************//**
+Copy an element to the binary heap.
+@return pointer to copied element */
+UNIV_INLINE
+void*
+ib_bh_set(
+/*======*/
+	ib_bh_t*	ib_bh,			/*!< in/out: instance */
+	ulint		i,			/*!< in: index */
+	const void*	elem)			/*!< in: element to add */
+{
+	void*		ptr = ib_bh_get(ib_bh, i);
+
+	ut_memcpy(ptr, elem, ib_bh->sizeof_elem);
+
+	return(ptr);
+}
+
+/**********************************************************************//**
+Return the first element from the binary heap.
+@return pointer to first element or NULL if empty. */
+UNIV_INLINE
+void*
+ib_bh_first(
+/*========*/
+	ib_bh_t*	ib_bh)			/*!< in: instance */
+{
+	return(ib_bh_is_empty(ib_bh) ? NULL : ib_bh_get(ib_bh, 0));
+}
+
+/**********************************************************************//**
+Return the last element from the binary heap.
+@return pointer to last element or NULL if empty. */
+UNIV_INLINE
+void*
+ib_bh_last(
+/*========*/
+	ib_bh_t*	ib_bh)			/*!< in/out: instance */
+{
+	return(ib_bh_is_empty(ib_bh)
+		? NULL
+		: ib_bh_get(ib_bh, ib_bh_size(ib_bh) - 1));
+}
+
+

=== modified file 'storage/innobase/srv/srv0srv.c'
--- a/storage/innobase/srv/srv0srv.c	2011-02-17 14:06:09 +0000
+++ b/storage/innobase/srv/srv0srv.c	2011-02-22 10:00:07 +0000
@@ -248,9 +248,12 @@ UNIV_INTERN ulong	srv_max_buf_pool_modif
 /* the number of purge threads to use from the worker pool (currently 0 or 1).*/
 UNIV_INTERN ulong srv_n_purge_threads = 0;
 
-/* the number of UNDO log pages to purge in one batch */
+/* the number of pages to purge in one batch */
 UNIV_INTERN ulong srv_purge_batch_size = 20;
 
+/* the number of rollback segments to use */
+UNIV_INTERN ulong srv_rollback_segments = TRX_SYS_N_RSEGS;
+
 /* variable counts amount of data read in total (in bytes) */
 UNIV_INTERN ulint srv_data_read = 0;
 
@@ -2639,7 +2642,7 @@ srv_task_execute(void)
 
 	ut_a(srv_force_recovery < SRV_FORCE_NO_BACKGROUND);
 
-	os_atomic_inc_ulint(&purge_sys->mutex, &purge_sys->n_executing, 1);
+	os_atomic_inc_ulint(&purge_sys->bh_mutex, &purge_sys->n_executing, 1);
 
 	mutex_enter(&srv_sys->tasks_mutex);
 
@@ -2659,10 +2662,10 @@ srv_task_execute(void)
 		que_run_threads(thr);
 
 		os_atomic_inc_ulint(
-			&purge_sys->mutex, &purge_sys->n_completed, 1);
+			&purge_sys->bh_mutex, &purge_sys->n_completed, 1);
 	}
 
-	os_atomic_dec_ulint(&purge_sys->mutex, &purge_sys->n_executing, 1);
+	os_atomic_dec_ulint(&purge_sys->bh_mutex, &purge_sys->n_executing, 1);
 
 	return(thr != NULL);
 }

=== modified file 'storage/innobase/srv/srv0start.c'
--- a/storage/innobase/srv/srv0start.c	2011-02-15 09:40:34 +0000
+++ b/storage/innobase/srv/srv0start.c	2011-02-22 05:11:15 +0000
@@ -1015,6 +1015,8 @@ innobase_start_or_create_for_mysql(void)
 	my_bool		srv_file_per_table_original_value
 		= srv_file_per_table;
 	mtr_t		mtr;
+	ib_bh_t*	ib_bh;
+
 #ifdef HAVE_DARWIN_THREADS
 # ifdef F_FULLFSYNC
 	/* This executable has been compiled on Mac OS X 10.3 or later.
@@ -1628,12 +1630,12 @@ innobase_start_or_create_for_mysql(void)
 		after the double write buffer has been created. */
 		trx_sys_create_sys_pages();
 
-		trx_sys_init_at_db_start();
+		ib_bh = trx_sys_init_at_db_start();
 
 		/* The purge system needs to create the purge view and
 		therefore requires that the trx_sys is inited. */
 
-		trx_purge_sys_create(srv_n_purge_threads);
+		trx_purge_sys_create(srv_n_purge_threads, ib_bh);
 
 		dict_create();
 
@@ -1657,12 +1659,12 @@ innobase_start_or_create_for_mysql(void)
 
 		dict_boot();
 
-		trx_sys_init_at_db_start();
+		ib_bh = trx_sys_init_at_db_start();
 
 		/* The purge system needs to create the purge view and
 		therefore requires that the trx_sys is inited. */
 
-		trx_purge_sys_create(srv_n_purge_threads);
+		trx_purge_sys_create(srv_n_purge_threads, ib_bh);
 
 		srv_startup_is_before_trx_rollback_phase = FALSE;
 
@@ -1720,12 +1722,12 @@ innobase_start_or_create_for_mysql(void)
 
 		dict_boot();
 
-		trx_sys_init_at_db_start();
+		ib_bh = trx_sys_init_at_db_start();
 
 		/* The purge system needs to create the purge view and
 		therefore requires that the trx_sys is inited. */
 
-		trx_purge_sys_create(srv_n_purge_threads);
+		trx_purge_sys_create(srv_n_purge_threads, ib_bh);
 
 		/* Initialize the fsp free limit global variable in the log
 		system */

=== modified file 'storage/innobase/sync/sync0sync.c'
--- a/storage/innobase/sync/sync0sync.c	2011-02-15 09:40:34 +0000
+++ b/storage/innobase/sync/sync0sync.c	2011-02-22 05:11:15 +0000
@@ -1176,7 +1176,7 @@ sync_thread_add_level(
 	case SYNC_RSEG:
 	case SYNC_TRX_UNDO:
 	case SYNC_PURGE_LATCH:
-	case SYNC_PURGE_SYS:
+	case SYNC_PURGE_QUEUE:
 	case SYNC_DICT_AUTOINC_MUTEX:
 	case SYNC_DICT_OPERATION:
 	case SYNC_DICT_HEADER:
@@ -1258,10 +1258,16 @@ sync_thread_add_level(
 		     || sync_thread_levels_g(array, SYNC_FSP, TRUE));
 		break;
 	case SYNC_TRX_UNDO_PAGE:
+		/* Purge is allowed to read in as many UNDO pages as it likes,
+		there was a bogus rule here earlier that forced the caller to
+		acquire the purge_sys_t::mutex. The purge mutex did not really
+		protect anything because it was only ever acquired by the
+		single purge thread. The purge thread can read the UNDO pages
+		without any covering mutex. */
+
 		ut_a(sync_thread_levels_contain(array, SYNC_TRX_UNDO)
 		     || sync_thread_levels_contain(array, SYNC_RSEG)
-		     || sync_thread_levels_contain(array, SYNC_PURGE_SYS)
-		     || sync_thread_levels_g(array, SYNC_TRX_UNDO_PAGE, TRUE));
+		     || sync_thread_levels_g(array, level - 1, TRUE));
 		break;
 	case SYNC_RSEG_HEADER:
 		ut_a(sync_thread_levels_contain(array, SYNC_RSEG));

=== modified file 'storage/innobase/trx/trx0purge.c'
--- a/storage/innobase/trx/trx0purge.c	2011-02-17 14:06:09 +0000
+++ b/storage/innobase/trx/trx0purge.c	2011-02-22 05:11:15 +0000
@@ -62,8 +62,8 @@ UNIV_INTERN mysql_pfs_key_t	trx_purge_la
 #endif /* UNIV_PFS_RWLOCK */
 
 #ifdef UNIV_PFS_MUTEX
-/* Key to register purge_sys_mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t	purge_sys_mutex_key;
+/* Key to register purge_sys_bh_mutex with performance schema */
+UNIV_INTERN mysql_pfs_key_t	purge_sys_bh_mutex_key;
 #endif /* UNIV_PFS_MUTEX */
 
 /********************************************************************//**
@@ -137,15 +137,22 @@ UNIV_INTERN
 void
 trx_purge_sys_create(
 /*=================*/
-	ulint	n_purge_threads)	/*!< in: number of purge threads */
+	ulint		n_purge_threads,	/*!< in: number of purge
+						threads */
+	ib_bh_t*	ib_bh)			/*!< in, own: UNDO log min
+						binary heap */
 {
 	purge_sys = mem_zalloc(sizeof(*purge_sys));
 
+	/* Take ownership of ib_bh, we are responsible for freeing it. */
+	purge_sys->ib_bh = ib_bh;
+
 	rw_lock_create(trx_purge_latch_key,
 		       &purge_sys->latch, SYNC_PURGE_LATCH);
 
-	mutex_create(purge_sys_mutex_key,
-		     &purge_sys->mutex, SYNC_PURGE_SYS);
+	mutex_create(
+		purge_sys_bh_mutex_key, &purge_sys->bh_mutex,
+		SYNC_PURGE_QUEUE);
 
 	purge_sys->heap = mem_heap_create(256);
 
@@ -199,9 +206,12 @@ trx_purge_sys_close(void)
 	}
 
 	rw_lock_free(&purge_sys->latch);
-	mutex_free(&purge_sys->mutex);
+	mutex_free(&purge_sys->bh_mutex);
 
 	mem_heap_free(purge_sys->heap);
+
+	ib_bh_free(purge_sys->ib_bh);
+
 	mem_free(purge_sys);
 
 	purge_sys = NULL;
@@ -224,30 +234,28 @@ trx_purge_add_update_undo_to_history(
 	trx_undo_t*	undo;
 	trx_rseg_t*	rseg;
 	trx_rsegf_t*	rseg_header;
-	trx_usegf_t*	seg_header;
 	trx_ulogf_t*	undo_header;
 	trx_upagef_t*	page_header;
-	ulint		hist_size;
 
 	undo = trx->update_undo;
-
-	ut_ad(undo);
-
 	rseg = undo->rseg;
 
-	ut_ad(mutex_own(&(rseg->mutex)));
-
-	rseg_header = trx_rsegf_get(rseg->space, rseg->zip_size,
-				    rseg->page_no, mtr);
+	rseg_header = trx_rsegf_get(
+		undo->rseg->space, undo->rseg->zip_size, undo->rseg->page_no,
+		mtr);
 
 	undo_header = undo_page + undo->hdr_offset;
-	seg_header  = undo_page + TRX_UNDO_SEG_HDR;
 	page_header = undo_page + TRX_UNDO_PAGE_HDR;
 
 	if (undo->state != TRX_UNDO_CACHED) {
+		ulint		hist_size;
+#ifdef UNIV_DEBUG
+		trx_usegf_t*	seg_header = undo_page + TRX_UNDO_SEG_HDR;
+#endif /* UNIV_DEBUG */
+
 		/* The undo log segment will not be reused */
 
-		if (undo->id >= TRX_RSEG_N_SLOTS) {
+		if (UNIV_UNLIKELY(undo->id >= TRX_RSEG_N_SLOTS)) {
 			fprintf(stderr,
 				"InnoDB: Error: undo->id is %lu\n",
 				(ulong) undo->id);
@@ -258,13 +266,15 @@ trx_purge_add_update_undo_to_history(
 
 		MONITOR_DEC(MONITOR_NUM_UNDO_SLOT_USED);
 
-		hist_size = mtr_read_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE,
-					   MLOG_4BYTES, mtr);
+		hist_size = mtr_read_ulint(
+			rseg_header + TRX_RSEG_HISTORY_SIZE, MLOG_4BYTES, mtr);
+
 		ut_ad(undo->size == flst_get_len(
 			      seg_header + TRX_UNDO_PAGE_LIST, mtr));
 
-		mlog_write_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE,
-				 hist_size + undo->size, MLOG_4BYTES, mtr);
+		mlog_write_ulint(
+			rseg_header + TRX_RSEG_HISTORY_SIZE,
+			hist_size + undo->size, MLOG_4BYTES, mtr);
 	}
 
 	/* Add the log as the first in the history list */
@@ -283,6 +293,7 @@ trx_purge_add_update_undo_to_history(
 
 	/* Write the trx number to the undo log header */
 	mlog_write_ull(undo_header + TRX_UNDO_TRX_NO, trx->no, mtr);
+
 	/* Write information about delete markings to the undo log header */
 
 	if (!undo->del_marks) {
@@ -291,23 +302,10 @@ trx_purge_add_update_undo_to_history(
 	}
 
 	if (rseg->last_page_no == FIL_NULL) {
-		void*		ptr;
-		rseg_queue_t	rseg_queue;
-
 		rseg->last_page_no = undo->hdr_page_no;
 		rseg->last_offset = undo->hdr_offset;
 		rseg->last_trx_no = trx->no;
 		rseg->last_del_marks = undo->del_marks;
-
-		rseg_queue.rseg = rseg;
-		rseg_queue.trx_no = rseg->last_trx_no;
-
-		rw_lock_x_lock(&trx_sys->lock);
-
-		ptr = ib_bh_push(trx_sys->ib_bh, &rseg_queue);
-		ut_a(ptr);
-
-		rw_lock_x_unlock(&trx_sys->lock);
 	}
 }
 
@@ -334,8 +332,6 @@ trx_purge_free_segment(
 
 	/*	fputs("Freeing an update undo log segment\n", stderr); */
 
-	ut_ad(mutex_own(&(purge_sys->mutex)));
-
 	for (;;) {
 		page_t*	undo_page;
 
@@ -373,6 +369,7 @@ trx_purge_free_segment(
 		}
 
 		mutex_exit(&rseg->mutex);
+
 		mtr_commit(&mtr);
 	}
 
@@ -444,8 +441,6 @@ trx_purge_truncate_rseg_history(
 	mtr_t		mtr;
 	trx_id_t	undo_trx_no;
 
-	ut_ad(mutex_own(&(purge_sys->mutex)));
-
 	mtr_start(&mtr);
 	mutex_enter(&(rseg->mutex));
 
@@ -546,8 +541,6 @@ trx_purge_truncate_history(
 {
 	ulint		i;
 
-	ut_ad(purge_mutex_own());
-
 	/* We play safe and set the truncate limit at most to the purge view
 	low_limit number, though this is not necessary */
 
@@ -568,6 +561,7 @@ trx_purge_truncate_history(
 	}
 }
 
+
 /***********************************************************************//**
 Updates the last not yet purged history log info in rseg when we have purged
 a whole undo log. Advances also purge_sys->purge_trx_no past the purged log. */
@@ -579,7 +573,7 @@ trx_purge_rseg_get_next_history_log(
 	ulint*		n_pages_handled)/*!< in/out: number of UNDO pages
 					handled */
 {
-	void*		ptr;
+	const void*	ptr;
 	page_t*		undo_page;
 	trx_ulogf_t*	log_hdr;
 	trx_usegf_t*	seg_hdr;
@@ -589,8 +583,6 @@ trx_purge_rseg_get_next_history_log(
 	mtr_t		mtr;
 	rseg_queue_t	rseg_queue;
 
-	ut_ad(mutex_own(&(purge_sys->mutex)));
-
 	mutex_enter(&(rseg->mutex));
 
 	ut_a(rseg->last_page_no != FIL_NULL);
@@ -601,8 +593,9 @@ trx_purge_rseg_get_next_history_log(
 
 	mtr_start(&mtr);
 
-	undo_page = trx_undo_page_get_s_latched(rseg->space, rseg->zip_size,
-						rseg->last_page_no, &mtr);
+	undo_page = trx_undo_page_get_s_latched(
+		rseg->space, rseg->zip_size, rseg->last_page_no, &mtr);
+
 	log_hdr = undo_page + rseg->last_offset;
 	seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
 
@@ -627,11 +620,11 @@ trx_purge_rseg_get_next_history_log(
 		on the MySQL mailing list on Nov 9, 2004. The fut0lst.c
 		file-based list was corrupt. The prev node pointer was
 		FIL_NULL, even though the list length was over 8 million nodes!
-		We assume that purge truncates the history list in moderate
+		We assume that purge truncates the history list in large
 		size pieces, and if we here reach the head of the list, the
-		list cannot be longer than 20 000 undo logs now. */
+		list cannot be longer than 2000 000 undo logs now. */
 
-		if (trx_sys->rseg_history_len > 20000) {
+		if (trx_sys->rseg_history_len > 2000000) {
 			ut_print_timestamp(stderr);
 			fprintf(stderr,
 				"  InnoDB: Warning: purge reached the"
@@ -648,7 +641,8 @@ trx_purge_rseg_get_next_history_log(
 		return;
 	}
 
-	mutex_exit(&(rseg->mutex));
+	mutex_exit(&rseg->mutex);
+
 	mtr_commit(&mtr);
 
 	/* Read the trx number and del marks from the previous log header */
@@ -674,19 +668,25 @@ trx_purge_rseg_get_next_history_log(
 	rseg_queue.rseg = rseg;
 	rseg_queue.trx_no = rseg->last_trx_no;
 
-	mutex_exit(&(rseg->mutex));
+	/* Purge can also produce events, however these are already ordered
+	in the rollback segment and any user generated event will be greater
+	than the events that Purge produces. ie. Purge can never produce
+	events from an empty rollback segment. */
 
-	rw_lock_x_lock(&trx_sys->lock);
+	mutex_enter(&purge_sys->bh_mutex);
 
-	ptr = ib_bh_push(trx_sys->ib_bh, &rseg_queue);
+	ptr = ib_bh_push(purge_sys->ib_bh, &rseg_queue);
 	ut_a(ptr != NULL);
 
-	rw_lock_x_unlock(&trx_sys->lock);
+	mutex_exit(&purge_sys->bh_mutex);
+
+	mutex_exit(&rseg->mutex);
 }
 
 /***********************************************************************//**
 Chooses the rollback segment with the smallest trx_id.
-@return zip_size if log is for a compressed table */
+@return zip_size if log is for a compressed table, ULINT_UNDEFINED if
+	no rollback segments to purge, 0 for non compressed tables. */
 static
 ulint
 trx_purge_get_rseg_with_min_trx_id(
@@ -695,51 +695,46 @@ trx_purge_get_rseg_with_min_trx_id(
 
 {
 	ulint		zip_size = 0;
-	trx_id_t	last_trx_no = IB_ULONGLONG_MAX;
-
-	ut_ad(mutex_own(&(purge_sys->mutex)));
 
-	rw_lock_s_lock(&trx_sys->lock);
+	mutex_enter(&purge_sys->bh_mutex);
 
-	if (!ib_bh_is_empty(trx_sys->ib_bh)) {
-		rseg_queue_t*	rseg_queue;
+	/* Only purge consumes events from the binary heap, user
+	threads only produce the events. */
 
-		rseg_queue = ib_bh_first(trx_sys->ib_bh);
+	if (!ib_bh_is_empty(purge_sys->ib_bh)) {
+		trx_rseg_t*	rseg;
 
-		purge_sys->rseg = rseg_queue->rseg;
-		last_trx_no = rseg_queue->trx_no;
+		rseg = ((rseg_queue_t*) ib_bh_first(purge_sys->ib_bh))->rseg;
+		ib_bh_pop(purge_sys->ib_bh);
 
-		ib_bh_pop(trx_sys->ib_bh);
+		mutex_exit(&purge_sys->bh_mutex);
 
+		purge_sys->rseg = rseg;
 	} else {
-		purge_sys->rseg = NULL;
-	}
+		mutex_exit(&purge_sys->bh_mutex);
 
-	rw_lock_s_unlock(&trx_sys->lock);
-
-	if (purge_sys->rseg != NULL) {
-		trx_rseg_t*	rseg = purge_sys->rseg;
-
-		mutex_enter(&rseg->mutex);
+		purge_sys->rseg = NULL;
 
-		ut_a(rseg->last_page_no != FIL_NULL);
+		return(ULINT_UNDEFINED);
+	}
 
-		/* We assume in purge of externally stored fields that
-		space id == 0 */
-		ut_a(rseg->space == 0);
+	ut_a(purge_sys->rseg != NULL);
 
-		zip_size = rseg->zip_size;
+	mutex_enter(&purge_sys->rseg->mutex);
 
-		ut_a(last_trx_no == rseg->last_trx_no);
+	ut_a(purge_sys->rseg->last_page_no != FIL_NULL);
 
-		purge_sys->iter.trx_no = rseg->last_trx_no;
+	/* We assume in purge of externally stored fields that space id == 0 */
+	ut_a(purge_sys->rseg->space == 0);
+	zip_size = purge_sys->rseg->zip_size;
 
-		purge_sys->hdr_offset = rseg->last_offset;
+	ut_a(purge_sys->iter.trx_no <= purge_sys->rseg->last_trx_no);
 
-		purge_sys->hdr_page_no = rseg->last_page_no;
+	purge_sys->iter.trx_no = purge_sys->rseg->last_trx_no;
+	purge_sys->hdr_offset = purge_sys->rseg->last_offset;
+	purge_sys->hdr_page_no = purge_sys->rseg->last_page_no;
 
-		mutex_exit(&rseg->mutex);
-	}
+	mutex_exit(&purge_sys->rseg->mutex);
 
 	return(zip_size);
 }
@@ -805,13 +800,15 @@ trx_purge_choose_next_log(void)
 {
 	ulint		zip_size;
 
-	ut_ad(mutex_own(&(purge_sys->mutex)));
 	ut_ad(purge_sys->next_stored == FALSE);
 
 	zip_size = trx_purge_get_rseg_with_min_trx_id(purge_sys);
 
 	if (purge_sys->rseg != NULL) {
 		trx_purge_read_undo_rec(purge_sys, zip_size);
+	} else {
+		/* There is nothing to do yet. */
+		os_thread_yield();
 	}
 }
 
@@ -837,7 +834,6 @@ trx_purge_get_next_rec(
 	ulint		zip_size;
 	mtr_t		mtr;
 
-	ut_ad(mutex_own(&(purge_sys->mutex)));
 	ut_ad(purge_sys->next_stored);
 
 	space = purge_sys->rseg->space;
@@ -875,9 +871,9 @@ trx_purge_get_next_rec(
 		/* Try first to find the next record which requires a purge
 		operation from the same page of the same undo log */
 
-		next_rec = trx_undo_page_get_next_rec(rec2,
-						      purge_sys->hdr_page_no,
-						      purge_sys->hdr_offset);
+		next_rec = trx_undo_page_get_next_rec(
+			rec2, purge_sys->hdr_page_no, purge_sys->hdr_offset);
+
 		if (next_rec == NULL) {
 			rec2 = trx_undo_get_next_rec(
 				rec2, purge_sys->hdr_page_no,
@@ -959,8 +955,6 @@ trx_purge_fetch_next_rec(
 	const read_view_t*	view = purge_sys->view;
 	const purge_iter_t	iter = purge_sys->iter;
 
-	ut_ad(mutex_own(&purge_sys->mutex));
-
 	if (!purge_sys->next_stored) {
 		trx_purge_choose_next_log();
 
@@ -996,10 +990,10 @@ trx_purge_fetch_next_rec(
 	return(trx_purge_get_next_rec(n_pages_handled, heap));
 }
 
-/***********************************************************//**
-Fetches an undo log record into the purge nodes in the query graph.
-@return the number of pages processed. */
-static
+/*******************************************************************//**
+This function runs a purge batch.
+@return	number of undo log pages handled in the batch */
+UNIV_INTERN
 ulint
 trx_purge_attach_undo_recs(
 /*=======================*/
@@ -1013,8 +1007,6 @@ trx_purge_attach_undo_recs(
 	ulint		n_pages_handled = 0;
 	ulint		n_thrs = UT_LIST_GET_LEN(purge_sys->query->thrs);
 
-	ut_ad(mutex_own(&purge_sys->mutex));
-
 	*limit = purge_sys->iter;
 
 	/* Debug code to validate some pre-requisites and reset done flag. */
@@ -1203,10 +1195,14 @@ void
 trx_purge_truncate(void)
 /*====================*/
 {
-	ut_ad(mutex_own(&purge_sys->mutex));
+	static ulint	count;
 
 	ut_ad(trx_purge_check_limit());
 
+	if (++count % TRX_SYS_N_RSEGS) {
+		return;
+	}
+
 	if (purge_sys->limit.trx_no == 0) {
 		trx_purge_truncate_history(&purge_sys->iter, purge_sys->view);
 	} else {
@@ -1231,8 +1227,6 @@ trx_purge(
 
 	srv_dml_needed_delay = trx_purge_dml_delay();
 
-	mutex_enter(&purge_sys->mutex);
-
 	/* The number of tasks submitted should be completed. */
 	ut_a(purge_sys->n_submitted == purge_sys->n_completed);
 
@@ -1252,8 +1246,6 @@ trx_purge(
 	n_pages_handled = trx_purge_attach_undo_recs(
 		n_purge_threads, purge_sys, &purge_sys->limit, batch_size);
 
-	mutex_exit(&purge_sys->mutex);
-
 	/* Do we do an asynchronous purge or not ? */
 	if (n_purge_threads > 0) {
 		ulint	i = 0;
@@ -1290,7 +1282,7 @@ run_synchronously:
 		que_run_threads(thr);
 
 		os_atomic_inc_ulint(
-			&purge_sys->mutex, &purge_sys->n_completed, 1);
+			&purge_sys->bh_mutex, &purge_sys->n_completed, 1);
 
 		if (n_purge_threads > 1) {
 			trx_purge_wait_for_workers_to_complete(purge_sys);
@@ -1301,16 +1293,12 @@ run_synchronously:
         to have already exited via os_event_wait(). We only truncate the UNDO
 	log when we are 100% sure that all submitted tasks have completed. */
 
-	mutex_enter(&purge_sys->mutex);
-
 	if (purge_sys->n_submitted == purge_sys->n_completed) {
 		trx_purge_truncate();
 	}
 
 	MONITOR_INC_VALUE(MONITOR_PURGE_N_PAGE_HANDLED, n_pages_handled);
 
-	mutex_exit(&purge_sys->mutex);
-
 	if (srv_print_thread_releases) {
 		fprintf(stderr, "Purge ends; pages handled %lu\n",
 			n_pages_handled);

=== modified file 'storage/innobase/trx/trx0rseg.c'
--- a/storage/innobase/trx/trx0rseg.c	2011-01-18 21:14:22 +0000
+++ b/storage/innobase/trx/trx0rseg.c	2011-02-22 05:11:15 +0000
@@ -167,12 +167,15 @@ static
 trx_rseg_t*
 trx_rseg_mem_create(
 /*================*/
-	ulint	id,		/*!< in: rollback segment id */
-	ulint	space,		/*!< in: space where the segment placed */
-	ulint	zip_size,	/*!< in: compressed page size in bytes
-				or 0 for uncompressed pages */
-	ulint	page_no,	/*!< in: page number of the segment header */
-	mtr_t*	mtr)		/*!< in: mtr */
+	ulint		id,		/*!< in: rollback segment id */
+	ulint		space,		/*!< in: space where the segment
+					placed */
+	ulint		zip_size,	/*!< in: compressed page size in bytes
+					or 0 for uncompressed pages */
+	ulint		page_no,	/*!< in: page number of the segment
+					header */
+	ib_bh_t*	ib_bh,		/*!< in/out: rseg queue */
+	mtr_t*		mtr)		/*!< in: mtr */
 {
 	ulint		len;
 	trx_rseg_t*	rseg;
@@ -194,51 +197,56 @@ trx_rseg_mem_create(
 
 	rseg_header = trx_rsegf_get_new(space, zip_size, page_no, mtr);
 
-	rseg->max_size = mtr_read_ulint(rseg_header + TRX_RSEG_MAX_SIZE,
-					MLOG_4BYTES, mtr);
+	rseg->max_size = mtr_read_ulint(
+		rseg_header + TRX_RSEG_MAX_SIZE, MLOG_4BYTES, mtr);
 
 	/* Initialize the undo log lists according to the rseg header */
 
 	sum_of_undo_sizes = trx_undo_lists_init(rseg);
 
-	rseg->curr_size = mtr_read_ulint(rseg_header + TRX_RSEG_HISTORY_SIZE,
-					 MLOG_4BYTES, mtr)
+	rseg->curr_size = mtr_read_ulint(
+		rseg_header + TRX_RSEG_HISTORY_SIZE, MLOG_4BYTES, mtr)
 		+ 1 + sum_of_undo_sizes;
 
 	len = flst_get_len(rseg_header + TRX_RSEG_HISTORY, mtr);
+
 	if (len > 0) {
-		void*		ptr;
 		rseg_queue_t	rseg_queue;
 
 		trx_sys->rseg_history_len += len;
 
 		node_addr = trx_purge_get_log_from_hist(
 			flst_get_last(rseg_header + TRX_RSEG_HISTORY, mtr));
+
 		rseg->last_page_no = node_addr.page;
 		rseg->last_offset = node_addr.boffset;
 
-		undo_log_hdr = trx_undo_page_get(rseg->space, rseg->zip_size,
-						 node_addr.page,
-						 mtr) + node_addr.boffset;
+		undo_log_hdr = trx_undo_page_get(
+			rseg->space, rseg->zip_size, node_addr.page,
+			mtr) + node_addr.boffset;
 
 		rseg->last_trx_no = mach_read_from_8(
 			undo_log_hdr + TRX_UNDO_TRX_NO);
+
 		rseg->last_del_marks = mtr_read_ulint(
 			undo_log_hdr + TRX_UNDO_DEL_MARKS, MLOG_2BYTES, mtr);
 
 		rseg_queue.rseg = rseg;
 		rseg_queue.trx_no = rseg->last_trx_no;
 
-		/* There is no need to cover this operation by the purge
-		mutex because we are still bootstrapping. */
+		if (rseg->last_page_no != FIL_NULL) {
+			const void*	ptr;
 
-		ptr = ib_bh_push(trx_sys->ib_bh, &rseg_queue);
-		ut_a(ptr != NULL);
+			/* There is no need to cover this operation by the purge
+			mutex because we are still bootstrapping. */
+
+			ptr = ib_bh_push(ib_bh, &rseg_queue);
+			ut_a(ptr != NULL);
+		}
 	} else {
 		rseg->last_page_no = FIL_NULL;
 	}
 
-
 	return(rseg);
 }
 
@@ -249,8 +257,9 @@ static
 void
 trx_rseg_create_instance(
 /*=====================*/
-	trx_sysf_t*	sys_header,	/*!< in/out: trx system header */
-	mtr_t*		mtr)		/*!< in/out: mtr */
+	trx_sysf_t*	sys_header,	/*!< in: trx system header */
+	ib_bh_t*	ib_bh,		/*!< in/out: rseg queue */
+	mtr_t*		mtr)		/*!< in: mtr */
 {
 	ulint		i;
 
@@ -273,7 +282,7 @@ trx_rseg_create_instance(
 			zip_size = space ? fil_space_get_zip_size(space) : 0;
 
 			rseg = trx_rseg_mem_create(
-				i, space, zip_size, page_no, mtr);
+				i, space, zip_size, page_no, ib_bh, mtr);
 
 			ut_a(rseg->id == i);
 		}
@@ -318,7 +327,8 @@ trx_rseg_create(void)
 		zip_size = space ? fil_space_get_zip_size(space) : 0;
 
 		rseg = trx_rseg_mem_create(
-			slot_no, space, zip_size, page_no, &mtr);
+			slot_no, space, zip_size, page_no,
+			purge_sys->ib_bh, &mtr);
 	}
 
 	mtr_commit(&mtr);
@@ -334,9 +344,10 @@ void
 trx_rseg_array_init(
 /*================*/
 	trx_sysf_t*	sys_header,	/* in/out: trx system header */
-	mtr_t*		mtr)		/* in/out: mtr */
+	ib_bh_t*	ib_bh,		/*!< in: rseg queue */
+	mtr_t*		mtr)		/*!< in: mtr */
 {
 	trx_sys->rseg_history_len = 0;
 
-	trx_rseg_create_instance(sys_header, mtr);
+	trx_rseg_create_instance(sys_header, ib_bh, mtr);
 }

=== modified file 'storage/innobase/trx/trx0sys.c'
--- a/storage/innobase/trx/trx0sys.c	2011-01-24 10:28:28 +0000
+++ b/storage/innobase/trx/trx0sys.c	2011-02-22 05:11:15 +0000
@@ -950,23 +950,58 @@ trx_sysf_create(
 }
 
 /*****************************************************************//**
+Compare two trx_rseg_t instances on last_trx_no. */
+static
+int
+trx_rseg_compare_last_trx_no(
+/*=========================*/
+	const void*	p1,		/*!< in: elem to compare */
+	const void*	p2)		/*!< in: elem to compare */
+{
+	ib_int64_t	cmp;
+
+	const rseg_queue_t*	rseg_q1 = (const rseg_queue_t*) p1;
+	const rseg_queue_t*	rseg_q2 = (const rseg_queue_t*) p2;
+
+	cmp = rseg_q1->trx_no - rseg_q2->trx_no;
+
+	if (cmp < 0) {
+		return(-1);
+	} else if (cmp > 0) {
+		return(1);
+	}
+
+	return(0);
+}
+
+/*****************************************************************//**
 Creates and initializes the central memory structures for the transaction
-system. This is called when the database is started. */
+system. This is called when the database is started.
+@return min binary heap of rsegs to purge */
 UNIV_INTERN
-void
+ib_bh_t*
 trx_sys_init_at_db_start(void)
 /*==========================*/
 {
+	mtr_t		mtr;
+	ib_bh_t*	ib_bh;
 	trx_sysf_t*	sys_header;
 	ib_uint64_t	rows_to_undo	= 0;
 	const char*	unit		= "";
-	mtr_t		mtr;
+
+	/* We create the min binary heap here and pass ownership to
+	purge when we init the purge sub-system. Purge is responsible
+	for freeing the binary heap. */
+
+	ib_bh = ib_bh_create(
+		trx_rseg_compare_last_trx_no,
+		sizeof(rseg_queue_t), TRX_SYS_N_RSEGS);
 
 	mtr_start(&mtr);
 
 	sys_header = trx_sysf_get(&mtr);
 
-	trx_rseg_array_init(sys_header, &mtr);
+	trx_rseg_array_init(sys_header, ib_bh, &mtr);
 
 	/* VERY important: after the database is started, max_trx_id value is
 	divisible by TRX_SYS_TRX_ID_WRITE_MARGIN, and the 'if' in
@@ -986,6 +1021,10 @@ trx_sys_init_at_db_start(void)
 
 	trx_lists_init_at_db_start();
 
+	/* This S lock is not strictly required, it is here only to satisfy
+	the debug code (assertions). We are still running in single threaded
+	bootstrap mode. */
+
 	rw_lock_s_lock(&trx_sys->lock);
 
 	if (UT_LIST_GET_LEN(trx_sys->trx_list) > 0) {
@@ -1022,31 +1061,8 @@ trx_sys_init_at_db_start(void)
 	UT_LIST_INIT(trx_sys->view_list);
 
 	mtr_commit(&mtr);
-}
 
-/*****************************************************************//**
-Compare two trx_rseg_t instances on last_trx_no. */
-static
-int
-trx_rseg_compare_last_trx_no(
-/*=========================*/
-	const void*	p1,		/*!< in: elem to compare */
-	const void*	p2)		/*!< in: elem to compare */
-{
-	ib_int64_t	cmp;
-
-	const rseg_queue_t*	rseg_q1 = (const rseg_queue_t*) p1;
-	const rseg_queue_t*	rseg_q2 = (const rseg_queue_t*) p2;
-
-	cmp = rseg_q1->trx_no - rseg_q2->trx_no;
-
-	if (cmp < 0) {
-		return(-1);
-	} else if (cmp > 0) {
-		return(1);
-	}
-
-	return(0);
+	return(ib_bh);
 }
 
 /*****************************************************************//**
@@ -1061,10 +1077,6 @@ trx_sys_create(void)
 
 	trx_sys = mem_zalloc(sizeof(*trx_sys));
 
-	trx_sys->ib_bh = ib_bh_create(
-		trx_rseg_compare_last_trx_no,
-	       	sizeof(rseg_queue_t), TRX_SYS_N_RSEGS * 128);
-
 	rw_lock_create(trx_sys_rw_lock_key, &trx_sys->lock, SYNC_TRX_SYS);
 
 	mutex_create(
@@ -1694,8 +1706,6 @@ trx_sys_close(void)
 
 	rw_lock_free(&trx_sys->lock);
 
-	ib_bh_free(trx_sys->ib_bh);
-
 	mem_free(trx_sys);
 
 	trx_sys = NULL;

=== modified file 'storage/innobase/trx/trx0trx.c'
--- a/storage/innobase/trx/trx0trx.c	2011-02-17 14:06:09 +0000
+++ b/storage/innobase/trx/trx0trx.c	2011-02-22 05:11:15 +0000
@@ -43,6 +43,7 @@ Created 3/26/1996 Heikki Tuuri
 #include "btr0sea.h"
 #include "os0proc.h"
 #include "trx0xa.h"
+#include "trx0purge.h"
 #include "ha_prototypes.h"
 #include "srv0mon.h"
 
@@ -595,11 +596,12 @@ trx_lists_init_at_db_start(void)
 
 /******************************************************************//**
 Assigns a rollback segment to a transaction in a round-robin fashion.
-@return	assigned rollback segment */
+@return	assigned rollback segment instance */
 UNIV_INLINE
 trx_rseg_t*
-trx_assign_rseg(void)
-/*=================*/
+trx_assign_rseg(
+/*============*/
+	ulint	max_undo_logs)	/*!< in: maximum number of UNDO logs to use */
 {
 	ulint		i;
 	trx_rseg_t*	rseg;
@@ -607,8 +609,16 @@ trx_assign_rseg(void)
 
 	/* This breaks true round robin but that should be OK. */
 
+	ut_a(max_undo_logs > 0 && max_undo_logs <= TRX_SYS_N_RSEGS);
+
 	i = latest_rseg++;
-        i %= TRX_SYS_N_RSEGS;
+        i %= max_undo_logs;
+
+	/* Note: The assumtion here is that there can't be any gaps in
+	the array. Once we implement more flexible rollback segment
+	management this may not hold. The assertion checks for that case. */
+
+	ut_a(trx_sys->rseg_array[0] != NULL);
 
 	do {
 		rseg = trx_sys->rseg_array[i];
@@ -618,14 +628,6 @@ trx_assign_rseg(void)
 
 	} while (rseg == NULL);
 
-#ifdef UNIV_DEBUG
-	rw_lock_x_lock(&trx_sys->lock);
-
-	ut_ad(trx_sys_validate_trx_list());
-
-	rw_lock_x_unlock(&trx_sys->lock);
-#endif /* UNIV_DEBUG */
-
 	return(rseg);
 }
 
@@ -648,7 +650,7 @@ trx_start_low(
 	/* The initial value for trx->no: IB_ULONGLONG_MAX is used in
 	read_view_open_now: */
 
-	rseg = trx_assign_rseg();
+	rseg = trx_assign_rseg(srv_rollback_segments);
 
 	trx->no = IB_ULONGLONG_MAX;
 
@@ -663,6 +665,7 @@ trx_start_low(
 	trx->in_mysql_trx_list would hold. In that case, the trx->state
 	change must be protected by the trx_sys->lock, so that
 	lock_print_info_all_transactions() will have a consistent view. */
+
 	trx->state = TRX_STATE_ACTIVE;
 
 	trx->id = trx_sys_get_new_trx_id();
@@ -679,100 +682,170 @@ trx_start_low(
 }
 
 /****************************************************************//**
-Commits a transaction. */
-UNIV_INTERN
+Set the transaction serialisation number. */
+static
 void
-trx_commit(
-/*=======*/
-	trx_t*	trx)	/*!< in: transaction */
+trx_serialisation_number_get(
+/*=========================*/
+	trx_t*		trx)	/*!< in: transaction */
 {
-	mtr_t			mtr;
-	trx_named_savept_t*	savep;
-	ib_uint64_t		lsn = 0;
-	page_t*			update_hdr_page;
+	trx_rseg_t*	rseg;
 
-	ut_ad(trx->in_trx_list);
-	ut_ad(!trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY));
+	rseg = trx->rseg;
 
-	if (trx->insert_undo != NULL || trx->update_undo != NULL) {
-		trx_rseg_t*	rseg;
+	ut_ad(mutex_own(&rseg->mutex));
+
+	rw_lock_x_lock(&trx_sys->lock);
+
+	trx->no = trx_sys_get_new_trx_id();
+
+	/* If the rollack segment is not empty then the
+	new trx_t::no can't be less than any trx_t::no
+	already in the rollback segment. User threads only
+	produce events when a rollback segment is empty. */
+
+	if (rseg->last_page_no == FIL_NULL) {
+		void*		ptr;
+		rseg_queue_t	rseg_queue;
+
+		rseg_queue.rseg = rseg;
+		rseg_queue.trx_no = trx->no;
+
+		mutex_enter(&purge_sys->bh_mutex);
+
+		/* This is to reduce the pressure on the trx_sys_t::lock
+		though in reality it should make very little (read no)
+		difference because this code path is only taken when the
+		rbs is empty. */
+
+		rw_lock_x_unlock(&trx_sys->lock);
+
+		ptr = ib_bh_push(purge_sys->ib_bh, &rseg_queue);
+		ut_a(ptr);
+
+		mutex_exit(&purge_sys->bh_mutex);
+	} else {
+		rw_lock_x_unlock(&trx_sys->lock);
+	}
+}
 
-		rseg = trx->rseg;
+/****************************************************************//**
+Assign the transaction its history serialisation number and write the
+update UNDO log record to the assigned rollback segment.
+@return the LSN of the UNDO log write. */
+static
+ib_uint64_t
+trx_write_serialisation_history(
+/*============================*/
+	trx_t*		trx)	/*!< in: transaction */
+{
+
+	mtr_t		mtr;
+	trx_rseg_t*	rseg;
+
+	rseg = trx->rseg;
 
-		mtr_start(&mtr);
+	mtr_start(&mtr);
 
-		/* Change the undo log segment states from TRX_UNDO_ACTIVE
-		to some other state: these modifications to the file data
-		structure define the transaction as committed in the file
-		based world, at the serialization point of the log sequence
-		number lsn obtained below. */
+	/* Change the undo log segment states from TRX_UNDO_ACTIVE
+	to some other state: these modifications to the file data
+	structure define the transaction as committed in the file
+	based domain, at the serialization point of the log sequence
+	number lsn obtained below. */
+
+	if (trx->update_undo != NULL) {
+		page_t*		undo_hdr_page;
+		trx_undo_t*	undo = trx->update_undo;
+
+		/* We have to hold the rseg mutex because update
+		log headers have to be put to the history list in the
+		(serialisation) order of the UNDO trx number. This is
+		required for the purge in-memory data structures too. */
 
 		mutex_enter(&rseg->mutex);
 
-		if (trx->insert_undo != NULL) {
-			trx_undo_set_state_at_finish(trx->insert_undo, &mtr);
-		}
+		/* Assign the transaction serialisation number and also
+		update the purge min binary heap if this is the first
+		UNDO log being written to the assigned rollback segment. */
+
+		trx_serialisation_number_get(trx);
+
+		/* It is not necessary to obtain trx->undo_mutex here
+		because only a single OS thread is allowed to do the
+		transaction commit for this transaction. */
 
-		if (trx->update_undo != NULL) {
+		undo_hdr_page = trx_undo_set_state_at_finish(undo, &mtr);
 
-			rw_lock_x_lock(&trx_sys->lock);
+		trx_undo_update_cleanup(trx, undo_hdr_page, &mtr);
+	} else {
+		mutex_enter(&rseg->mutex);
+	}
 
-			trx->no = trx_sys_get_new_trx_id();
+	if (trx->insert_undo != NULL) {
+		trx_undo_set_state_at_finish(trx->insert_undo, &mtr);
+	}
 
-			rw_lock_x_unlock(&trx_sys->lock);
+	mutex_exit(&rseg->mutex);
 
-			/* It is not necessary to obtain trx->undo_mutex here
-			because only a single OS thread is allowed to do the
-			transaction commit for this transaction. */
+	MONITOR_INC(MONITOR_TRX_COMMIT_UNDO);
 
-			update_hdr_page = trx_undo_set_state_at_finish(
-				trx->update_undo, &mtr);
+	/* Update the latest MySQL binlog name and offset info
+	in trx sys header if MySQL binlogging is on or the database
+	server is a MySQL replication slave */
 
-			/* We have to do the cleanup for the update log while
-			holding the rseg mutex because update log headers
-			have to be put to the history list in the order of
-			the trx number. */
+	if (trx->mysql_log_file_name
+	    && trx->mysql_log_file_name[0] != '\0') {
 
-			trx_undo_update_cleanup(trx, update_hdr_page, &mtr);
-		}
+		trx_sys_update_mysql_binlog_offset(
+			trx->mysql_log_file_name,
+			trx->mysql_log_offset,
+			TRX_SYS_MYSQL_LOG_INFO, &mtr);
 
-		MONITOR_INC(MONITOR_TRX_COMMIT_UNDO);
+		trx->mysql_log_file_name = NULL;
+	}
 
-		mutex_exit(&rseg->mutex);
+	/* The following call commits the mini-transaction, making the
+	whole transaction committed in the file-based world, at this
+	log sequence number. The transaction becomes 'durable' when
+	we write the log to disk, but in the logical sense the commit
+	in the file-based data structures (undo logs etc.) happens
+	here.
 
-		/* Update the latest MySQL binlog name and offset info
-		in trx sys header if MySQL binlogging is on or the database
-		server is a MySQL replication slave */
-
-		if (trx->mysql_log_file_name
-		    && trx->mysql_log_file_name[0] != '\0') {
-			trx_sys_update_mysql_binlog_offset(
-				trx->mysql_log_file_name,
-				trx->mysql_log_offset,
-				TRX_SYS_MYSQL_LOG_INFO, &mtr);
-			trx->mysql_log_file_name = NULL;
-		}
+	NOTE that transaction numbers, which are assigned only to
+	transactions with an update undo log, do not necessarily come
+	in exactly the same order as commit lsn's, if the transactions
+	have different rollback segments. To get exactly the same
+	order we should hold the kernel mutex up to this point,
+	adding to the contention of the kernel mutex. However, if
+	a transaction T2 is able to see modifications made by
+	a transaction T1, T2 will always get a bigger transaction
+	number and a bigger commit lsn than T1. */
+
+	/*--------------*/
+	mtr_commit(&mtr);
+	/*--------------*/
+
+	return(mtr.end_lsn);
+}
 
-		/* The following call commits the mini-transaction, making the
-		whole transaction committed in the file-based world, at this
-		log sequence number. The transaction becomes 'durable' when
-		we write the log to disk, but in the logical sense the commit
-		in the file-based data structures (undo logs etc.) happens
-		here.
-
-		NOTE that transaction numbers, which are assigned only to
-		transactions with an update undo log, do not necessarily come
-		in exactly the same order as commit lsn's, if the transactions
-		have different rollback segments. To get exactly the same
-		order we should hold the trx sys mutex up to this point.
-		However, if a transaction T2 is able to see modifications
-		made by a transaction T1, T2 will always get a bigger
-		transaction number and a bigger commit lsn than T1. */
-
-		/*--------------*/
-		mtr_commit(&mtr);
-		/*--------------*/
-		lsn = mtr.end_lsn;
+/****************************************************************//**
+Commits a transaction. */
+UNIV_INTERN
+void
+trx_commit(
+/*=======*/
+	trx_t*	trx)	/*!< in: transaction */
+{
+	trx_named_savept_t*	savep;
+	ib_uint64_t		lsn = 0;
+
+	ut_ad(trx->in_trx_list);
+	ut_ad(!trx_state_eq(trx, TRX_STATE_COMMITTED_IN_MEMORY));
+
+	if (trx->insert_undo != NULL || trx->update_undo != NULL) {
+		lsn = trx_write_serialisation_history(trx);
+	} else {
+		lsn = 0;
 	}
 
 	trx->must_flush_log_later = FALSE;
@@ -781,14 +854,17 @@ trx_commit(
 
 	/* Remove the transaction from the list of active transactions
 	now that it no longer holds any user locks. */
+
 	rw_lock_x_lock(&trx_sys->lock);
 	ut_ad(trx->in_trx_list);
 	ut_d(trx->in_trx_list = FALSE);
 	UT_LIST_REMOVE(trx_list, trx_sys->trx_list, trx);
+
 	/* If this transaction came from trx_allocate_for_mysql(),
 	trx->in_mysql_trx_list would hold. In that case, the
 	trx->state change must be protected by trx_sys->lock, so that
 	lock_print_info_all_transactions() will have a consistent view. */
+
 	trx->state = TRX_STATE_NOT_STARTED;
 	ut_ad(trx_sys_validate_trx_list());
 	MONITOR_INC(MONITOR_TRX_COMMIT);

=== added file 'storage/innobase/ut/ut0bh.c'
--- a/storage/innobase/ut/ut0bh.c	1970-01-01 00:00:00 +0000
+++ b/storage/innobase/ut/ut0bh.c	2011-02-23 06:48:15 +0000
@@ -0,0 +1,164 @@
+/***************************************************************************//**
+Copyright (c) 2010, 2011, Oracle Corpn. All Rights Reserved.
+
+Portions of this file contain modifications contributed and copyrighted by
+Sun Microsystems, Inc. Those modifications are gratefully acknowledged and
+are described briefly in the InnoDB documentation. The contributions by
+Sun Microsystems are incorporated with their permission, and subject to the
+conditions contained in the file COPYING.Sun_Microsystems.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/******************************************************************//**
+@file ut/ut0bh.c
+Binary min-heap implementation.
+
+Created 2010-05-28 by Sunny Bains
+*******************************************************/
+
+#include "ut0bh.h"
+#include "ut0mem.h"
+
+#ifdef UNIV_NONINL
+#include "ut0bh.ic"
+#endif
+
+#include <string.h>
+
+/**********************************************************************//**
+Create a binary heap.
+@return a new binary heap */
+UNIV_INTERN
+ib_bh_t*
+ib_bh_create(
+/*=========*/
+	ib_bh_cmp_t	compare,		/*!< in: comparator */
+	ulint		sizeof_elem,		/*!< in: size of one element */
+	ulint		max_elems)		/*!< in: max elements allowed */
+{
+	ulint		sz;
+	ib_bh_t*	ib_bh;
+
+	sz = sizeof(*ib_bh) + (sizeof_elem * max_elems);
+
+	ib_bh = (ib_bh_t*) ut_malloc(sz);
+	memset(ib_bh, 0x0, sz);
+
+	ib_bh->compare = compare;
+	ib_bh->max_elems = max_elems;
+	ib_bh->sizeof_elem = sizeof_elem;
+
+	return(ib_bh);
+}
+
+/**********************************************************************//**
+Free a binary heap.
+@return a new binary heap */
+UNIV_INTERN
+void
+ib_bh_free(
+/*=======*/
+	ib_bh_t*	ib_bh)			/*!< in/own: instance */
+{
+	ut_free(ib_bh);
+}
+
+/**********************************************************************//**
+Add an element to the binary heap. Note: The element is copied.
+@return pointer to added element or NULL if full. */
+UNIV_INTERN
+void*
+ib_bh_push(
+/*=======*/
+	ib_bh_t*	ib_bh,			/*!< in/out: instance */
+	const void*	elem)			/*!< in: element to add */
+{
+	void*		ptr;
+
+	if (ib_bh_is_full(ib_bh)) {
+		return(NULL);
+	} else if (ib_bh_is_empty(ib_bh)) {
+		++ib_bh->n_elems;
+		return(ib_bh_set(ib_bh, 0, elem));
+	} else {
+		ulint	i;
+
+		i = ib_bh->n_elems;
+
+		++ib_bh->n_elems;
+
+		for (ptr = ib_bh_get(ib_bh, i >> 1);
+		     i > 0 && ib_bh->compare(ptr, elem) > 0;
+		     i >>= 1, ptr = ib_bh_get(ib_bh, i >> 1)) {
+
+			ib_bh_set(ib_bh, i, ptr);
+		}
+
+		ptr = ib_bh_set(ib_bh, i, elem);
+	}
+
+	return(ptr);
+}
+
+/**********************************************************************//**
+Remove the first element from the binary heap. */
+UNIV_INTERN
+void
+ib_bh_pop(
+/*======*/
+	ib_bh_t*	ib_bh)			/*!< in/out: instance */
+{
+	byte*		ptr;
+	byte*		last;
+	ulint		parent = 0;
+
+	if (ib_bh_is_empty(ib_bh)) {
+		return;
+	} else if (ib_bh_size(ib_bh) == 1) {
+		--ib_bh->n_elems;
+		return;
+	}
+
+	last = (byte*) ib_bh_last(ib_bh);
+
+	/* Start from the child node */
+	ptr = (byte*) ib_bh_get(ib_bh, 1);
+
+	while (ptr < last) {
+		/* If the "right" child node is < "left" child node */
+		if (ib_bh->compare(ptr + ib_bh->sizeof_elem, ptr) < 0) {
+			ptr += ib_bh->sizeof_elem;
+		}
+
+		if (ib_bh->compare(last, ptr) <= 0) {
+			break;
+		}
+
+		ib_bh_set(ib_bh, parent, ptr);
+
+		parent = (ptr - (byte*) ib_bh_first(ib_bh))
+		       / ib_bh->sizeof_elem;
+
+		if ((parent << 1) >= ib_bh_size(ib_bh)) {
+			break;
+		}
+
+		ptr = (byte*) ib_bh_get(ib_bh, parent << 1);
+	}
+
+	--ib_bh->n_elems;
+
+	ib_bh_set(ib_bh, parent, last);
+}

=== removed file 'storage/innobase/ut/ut0bh.c'
--- a/storage/innobase/ut/ut0bh.c	2010-05-28 10:12:31 +0000
+++ b/storage/innobase/ut/ut0bh.c	1970-01-01 00:00:00 +0000
@@ -1,265 +0,0 @@
-/***************************************************************************//**
-
-Copyright (c) 2010, Oracle Corpn. All Rights Reserved.
-
-Portions of this file contain modifications contributed and copyrighted by
-Sun Microsystems, Inc. Those modifications are gratefully acknowledged and
-are described briefly in the InnoDB documentation. The contributions by
-Sun Microsystems are incorporated with their permission, and subject to the
-conditions contained in the file COPYING.Sun_Microsystems.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-
-/******************************************************************//**
-@file ut/ut0bh.c
-Binary min-heap implementation.
-
-Created 2010-05-28 by Sunny Bains
-*******************************************************/
-
-#include "ut0bh.h"
-#include "ut0mem.h"
-
-#include <string.h>
-
-/** Binary heap data structure */
-struct ib_bh_struct {
-	ulint		max_elems;		/*!< max elements allowed */
-	ulint		n_elems;		/*!< current size */
-	ulint		sizeof_elem;		/*!< sizeof element */
-	ib_bh_cmp_t	compare;		/*!< comparator */
-};
-
-/**********************************************************************//**
-Get the number of elements in the binary heap.
-@return number of elements */
-UNIV_INTERN
-ulint
-ib_bh_size(
-/*=======*/
-	const ib_bh_t*	ib_bh)			/*!< in: instance */
-{
-	return(ib_bh->n_elems);
-}
-
-/**********************************************************************//**
-Test if binary heap is empty.
-@return TRUE if empty. */
-UNIV_INTERN
-ibool
-ib_bh_is_empty(
-/*===========*/
-	const ib_bh_t*	ib_bh)			/*!< in: instance */
-{
-	return(ib_bh_size(ib_bh) == 0);
-}
-
-/**********************************************************************//**
-Test if binary heap is full.
-@return TRUE if full. */
-UNIV_INTERN
-ibool
-ib_bh_is_full(
-/*===========*/
-	const ib_bh_t*	ib_bh)			/*!< in: instance */
-{
-	return(ib_bh_size(ib_bh) >= ib_bh->max_elems);
-}
-
-/**********************************************************************//**
-Get a pointer to the element.
-@return pointer to element */
-UNIV_INTERN
-void*
-ib_bh_get(
-/*=======*/
-	ib_bh_t*	ib_bh,			/*!< in: instance */
-	ulint		i)			/*!< in: index */
-{
-	byte*		ptr = (byte*) (ib_bh + 1);
-
-	ut_a(i < ib_bh_size(ib_bh));
-
-	return(ptr + (ib_bh->sizeof_elem * i));
-}
-
-/**********************************************************************//**
-Copy an element to the binary heap.
-@return pointer to copied element */
-UNIV_INTERN
-void*
-ib_bh_set(
-/*======*/
-	ib_bh_t*	ib_bh,			/*!< in,out: instance */
-	ulint		i,			/*!< in: index */
-	const void*	elem)			/*!< in: element to add */
-{
-	void*		ptr = ib_bh_get(ib_bh, i);
-
-	memcpy(ptr, elem, ib_bh->sizeof_elem);
-
-	return(ptr);
-}
-
-/**********************************************************************//**
-Create a binary heap.
-@return a new binary heap */
-UNIV_INTERN
-ib_bh_t*
-ib_bh_create(
-/*=========*/
-	ib_bh_cmp_t	compare,		/*!< in: comparator */
-	ulint		sizeof_elem,		/*!< in: size of one element */
-	ulint		max_elems)		/*!< in: max elements allowed */
-{
-	ulint		sz;
-	ib_bh_t*	ib_bh;
-
-	sz = sizeof(*ib_bh) + (sizeof_elem * max_elems);
-
-	ib_bh = (ib_bh_t*) ut_malloc(sz);
-	memset(ib_bh, 0x0, sz);
-	
-	ib_bh->compare = compare;
-	ib_bh->max_elems = max_elems;
-	ib_bh->sizeof_elem = sizeof_elem;
-
-	return(ib_bh);
-}
-
-/**********************************************************************//**
-Free a binary heap.
-@return a new binary heap */
-UNIV_INTERN
-void
-ib_bh_free(
-/*=======*/
-	ib_bh_t*	ib_bh)			/*!< in,own: instance */
-{
-	ut_free(ib_bh);
-}
-
-/**********************************************************************//**
-Add an element to the binary heap. Note: The element is copied.
-@return pointer to added element or NULL if full. */
-UNIV_INTERN
-void*
-ib_bh_push(
-/*=======*/
-	ib_bh_t*	ib_bh,			/*!< in,out: instance */
-	const void*	elem)			/*!< in: element to add */
-{
-	void*		ptr = NULL;
-
-	if (!ib_bh_is_full(ib_bh)) {
-		ulint	i;
-
-		if (ib_bh_is_empty(ib_bh)) {
-			++ib_bh->n_elems;
-			return(ib_bh_set(ib_bh, 0, elem));
-		}
-
-		i = ib_bh->n_elems;
-
-		++ib_bh->n_elems;
-
-		for (ptr = ib_bh_get(ib_bh, i >> 1);
-		     i > 0 && ib_bh->compare(ptr, elem) > 0;
-		     i >>= 1, ptr = ib_bh_get(ib_bh, i >> 1)) {
-
-			ib_bh_set(ib_bh, i, ptr);
-		}
-
-		ptr = ib_bh_set(ib_bh, i, elem);
-	}
-
-	return(ptr);
-}
-
-/**********************************************************************//**
-Return the first element from the binary heap. 
-@return pointer to first element or NULL if empty. */
-UNIV_INTERN
-void*
-ib_bh_first(
-/*========*/
-	ib_bh_t*	ib_bh)			/*!< in,out: instance */
-{
-	return(ib_bh_is_empty(ib_bh) ? NULL : ib_bh_get(ib_bh, 0));
-}
-
-/**********************************************************************//**
-Return the last element from the binary heap. 
-@return pointer to last element or NULL if empty. */
-UNIV_INTERN
-void*
-ib_bh_last(
-/*========*/
-	ib_bh_t*	ib_bh)			/*!< in,out: instance */
-{
-	return(ib_bh_is_empty(ib_bh)
-	       ? NULL 
-	       : ib_bh_get(ib_bh, ib_bh_size(ib_bh) - 1));
-}
-
-/**********************************************************************//**
-Remove the first element from the binary heap. */
-UNIV_INTERN
-void
-ib_bh_pop(
-/*======*/
-	ib_bh_t*	ib_bh)			/*!< in,out: instance */
-{
-	byte*		ptr;
-	byte*		last;
-	ulint		parent = 0;
-
-	if (ib_bh_is_empty(ib_bh)) {
-		return;
-	} else if (ib_bh_size(ib_bh) == 1) {
-		--ib_bh->n_elems;
-		return;
-	}
-
-	last = (byte*) ib_bh_last(ib_bh);
-
-	/* Start from the child node */
-	ptr = (byte*) ib_bh_get(ib_bh, 1);
-
-	while (ptr < last) {
-		/* If the "right" child node is < "left" child node */
-		if (ib_bh->compare(ptr + ib_bh->sizeof_elem, ptr) < 0) {
-			ptr += ib_bh->sizeof_elem;
-		}
-
-		if (ib_bh->compare(last, ptr) > 0) {
-			ib_bh_set(ib_bh, parent, ptr);
-		} else {
-			break;
-		}
-
-		parent = (ptr - (byte*) ib_bh_first(ib_bh)) / ib_bh->sizeof_elem;
-
-		if ((parent << 1) < ib_bh_size(ib_bh)) {
-			ptr = (byte*) ib_bh_get(ib_bh, parent << 1);
-		} else {
-			break;
-		}
-	}
-
-	--ib_bh->n_elems;
-
-	ib_bh_set(ib_bh, parent, last);
-}

=== modified file 'storage/innobase/ut/ut0ut.c'
--- a/storage/innobase/ut/ut0ut.c	2010-11-15 07:11:18 +0000
+++ b/storage/innobase/ut/ut0ut.c	2011-02-22 05:11:15 +0000
@@ -1,13 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
-Copyright (c) 2009, Sun Microsystems, Inc.
-
-Portions of this file contain modifications contributed and copyrighted by
-Sun Microsystems, Inc. Those modifications are gratefully acknowledged and
-are described briefly in the InnoDB documentation. The contributions by
-Sun Microsystems are incorporated with their permission, and subject to the
-conditions contained in the file COPYING.Sun_Microsystems.
+Copyright (c) 2011, Oracle Corpn. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software

No bundle (reason: useless for push emails).
Thread
bzr push into mysql-trunk-mtr branch (bjorn.munch:3031 to 3033) Bjorn Munch24 Feb