List:Commits« Previous MessageNext Message »
From:Sergey Vojtovich Date:April 1 2010 3:10pm
Subject:bzr commit into mysql-5.1-bugteam branch (svoj:3439)
View as plain text  
#At file:///home/svoj/devel/bzr-mysql/mysql-5.1-bugteam/ based on revid:joro@stripped

 3439 Sergey Vojtovich	2010-04-01 [merge]
      Merge innodb-zip-ss6900 to mysql-5.1-bugteam.

    added:
      mysql-test/suite/innodb/r/innodb_bug47622.result
      mysql-test/suite/innodb/r/innodb_bug51378.result
      mysql-test/suite/innodb/t/innodb_bug47622.test
      mysql-test/suite/innodb/t/innodb_bug51378.test
      storage/innodb_plugin/include/ut0rbt.h
      storage/innodb_plugin/ut/ut0rbt.c
    modified:
      mysql-test/suite/innodb/r/innodb-index.result
      mysql-test/suite/innodb/r/innodb_bug44571.result
      mysql-test/suite/innodb/t/innodb-consistent.test
      mysql-test/suite/innodb/t/innodb-index.test
      mysql-test/suite/innodb/t/innodb_bug44571.test
      storage/innodb_plugin/CMakeLists.txt
      storage/innodb_plugin/ChangeLog
      storage/innodb_plugin/Makefile.am
      storage/innodb_plugin/btr/btr0btr.c
      storage/innodb_plugin/btr/btr0cur.c
      storage/innodb_plugin/btr/btr0pcur.c
      storage/innodb_plugin/buf/buf0buddy.c
      storage/innodb_plugin/buf/buf0buf.c
      storage/innodb_plugin/buf/buf0flu.c
      storage/innodb_plugin/buf/buf0lru.c
      storage/innodb_plugin/buf/buf0rea.c
      storage/innodb_plugin/dict/dict0boot.c
      storage/innodb_plugin/dict/dict0crea.c
      storage/innodb_plugin/dict/dict0dict.c
      storage/innodb_plugin/dict/dict0load.c
      storage/innodb_plugin/dict/dict0mem.c
      storage/innodb_plugin/fil/fil0fil.c
      storage/innodb_plugin/fsp/fsp0fsp.c
      storage/innodb_plugin/ha/ha0ha.c
      storage/innodb_plugin/ha/hash0hash.c
      storage/innodb_plugin/handler/ha_innodb.cc
      storage/innodb_plugin/handler/ha_innodb.h
      storage/innodb_plugin/handler/handler0alter.cc
      storage/innodb_plugin/ibuf/ibuf0ibuf.c
      storage/innodb_plugin/include/btr0btr.h
      storage/innodb_plugin/include/btr0btr.ic
      storage/innodb_plugin/include/btr0cur.h
      storage/innodb_plugin/include/btr0pcur.h
      storage/innodb_plugin/include/btr0pcur.ic
      storage/innodb_plugin/include/buf0buf.h
      storage/innodb_plugin/include/buf0buf.ic
      storage/innodb_plugin/include/buf0flu.h
      storage/innodb_plugin/include/data0type.ic
      storage/innodb_plugin/include/dict0boot.h
      storage/innodb_plugin/include/dict0mem.h
      storage/innodb_plugin/include/fil0fil.h
      storage/innodb_plugin/include/hash0hash.h
      storage/innodb_plugin/include/hash0hash.ic
      storage/innodb_plugin/include/lock0lock.h
      storage/innodb_plugin/include/log0log.h
      storage/innodb_plugin/include/log0log.ic
      storage/innodb_plugin/include/log0recv.h
      storage/innodb_plugin/include/mem0dbg.h
      storage/innodb_plugin/include/mem0dbg.ic
      storage/innodb_plugin/include/mem0mem.h
      storage/innodb_plugin/include/mem0mem.ic
      storage/innodb_plugin/include/mtr0mtr.ic
      storage/innodb_plugin/include/os0file.h
      storage/innodb_plugin/include/que0que.h
      storage/innodb_plugin/include/que0que.ic
      storage/innodb_plugin/include/row0mysql.h
      storage/innodb_plugin/include/row0sel.h
      storage/innodb_plugin/include/srv0srv.h
      storage/innodb_plugin/include/sync0rw.h
      storage/innodb_plugin/include/sync0sync.h
      storage/innodb_plugin/include/trx0rseg.h
      storage/innodb_plugin/include/trx0sys.h
      storage/innodb_plugin/include/trx0trx.h
      storage/innodb_plugin/include/trx0types.h
      storage/innodb_plugin/include/univ.i
      storage/innodb_plugin/include/ut0rnd.ic
      storage/innodb_plugin/lock/lock0lock.c
      storage/innodb_plugin/log/log0log.c
      storage/innodb_plugin/log/log0recv.c
      storage/innodb_plugin/mem/mem0dbg.c
      storage/innodb_plugin/mem/mem0mem.c
      storage/innodb_plugin/os/os0file.c
      storage/innodb_plugin/page/page0page.c
      storage/innodb_plugin/plug.in
      storage/innodb_plugin/rem/rem0rec.c
      storage/innodb_plugin/row/row0ins.c
      storage/innodb_plugin/row/row0merge.c
      storage/innodb_plugin/row/row0mysql.c
      storage/innodb_plugin/row/row0row.c
      storage/innodb_plugin/row/row0sel.c
      storage/innodb_plugin/row/row0umod.c
      storage/innodb_plugin/row/row0upd.c
      storage/innodb_plugin/srv/srv0srv.c
      storage/innodb_plugin/srv/srv0start.c
      storage/innodb_plugin/sync/sync0sync.c
      storage/innodb_plugin/trx/trx0i_s.c
      storage/innodb_plugin/trx/trx0rec.c
      storage/innodb_plugin/trx/trx0rseg.c
      storage/innodb_plugin/trx/trx0sys.c
      storage/innodb_plugin/trx/trx0trx.c
=== modified file 'mysql-test/suite/innodb/r/innodb-index.result'
--- a/mysql-test/suite/innodb/r/innodb-index.result	2009-11-30 12:49:13 +0000
+++ b/mysql-test/suite/innodb/r/innodb-index.result	2010-04-01 12:04:43 +0000
@@ -441,6 +441,7 @@ t3	CREATE TABLE `t3` (
   KEY `c` (`c`)
 ) ENGINE=InnoDB DEFAULT CHARSET=latin1
 alter table t2 drop index b, add index (b);
+ERROR 42000: Incorrect index name 'b'
 show create table t2;
 Table	Create Table
 t2	CREATE TABLE `t2` (
@@ -451,8 +452,8 @@ t2	CREATE TABLE `t2` (
   `e` int(11) DEFAULT NULL,
   PRIMARY KEY (`a`),
   UNIQUE KEY `dc` (`d`,`c`),
-  KEY `c` (`c`),
   KEY `b` (`b`),
+  KEY `c` (`c`),
   CONSTRAINT `t2_ibfk_1` FOREIGN KEY (`b`) REFERENCES `t1` (`b`) ON DELETE CASCADE,
   CONSTRAINT `t2_ibfk_2` FOREIGN KEY (`c`) REFERENCES `t3` (`c`),
   CONSTRAINT `t2_ibfk_3` FOREIGN KEY (`d`) REFERENCES `t4` (`d`)

=== modified file 'mysql-test/suite/innodb/r/innodb_bug44571.result'
--- a/mysql-test/suite/innodb/r/innodb_bug44571.result	2009-10-08 11:28:37 +0000
+++ b/mysql-test/suite/innodb/r/innodb_bug44571.result	2010-04-01 12:22:36 +0000
@@ -2,8 +2,7 @@ CREATE TABLE bug44571 (foo INT) ENGINE=I
 ALTER TABLE bug44571 CHANGE foo bar INT;
 ALTER TABLE bug44571 ADD INDEX bug44571b (foo);
 ERROR 42000: Key column 'foo' doesn't exist in table
-ALTER TABLE bug44571 ADD INDEX bug44571b (bar);
-ERROR HY000: Incorrect key file for table 'bug44571'; try to repair it
-CREATE INDEX bug44571b ON bug44571 (bar);
-ERROR HY000: Incorrect key file for table 'bug44571'; try to repair it
+ALTER TABLE bug44571 ADD INDEX bug44571c (bar);
+DROP INDEX bug44571c ON bug44571;
+CREATE INDEX bug44571c ON bug44571 (bar);
 DROP TABLE bug44571;

=== added file 'mysql-test/suite/innodb/r/innodb_bug47622.result'
--- a/mysql-test/suite/innodb/r/innodb_bug47622.result	1970-01-01 00:00:00 +0000
+++ b/mysql-test/suite/innodb/r/innodb_bug47622.result	2010-04-01 11:30:11 +0000
@@ -0,0 +1,23 @@
+CREATE TABLE bug47622(
+`rule_key` int(11) NOT NULL DEFAULT '0',
+`seq` smallint(6) NOT NULL DEFAULT '0',
+`action` smallint(6) NOT NULL DEFAULT '0',
+`arg_id` smallint(6) DEFAULT NULL,
+`else_ind` TINYINT NOT NULL,
+KEY IDX_A (`arg_id`)
+) ENGINE=InnoDB;
+ALTER TABLE bug47622 ADD UNIQUE IDX_B (rule_key,else_ind,seq,action,arg_id);
+drop index IDX_B on bug47622;
+create index idx on bug47622(seq, arg_id);
+ALTER TABLE bug47622 ADD UNIQUE IDX_X (rule_key,else_ind,seq,action);
+drop table bug47622;
+CREATE TABLE bug47622 (
+`a` int(11) NOT NULL,
+`b` int(11) DEFAULT NULL,
+`c` char(10) DEFAULT NULL,
+`d` varchar(20) DEFAULT NULL,
+PRIMARY KEY (`a`),
+KEY `b` (`b`)
+) ENGINE=InnoDB;
+alter table bug47622 add unique index (c), add index (d);
+drop table bug47622;

=== added file 'mysql-test/suite/innodb/r/innodb_bug51378.result'
--- a/mysql-test/suite/innodb/r/innodb_bug51378.result	1970-01-01 00:00:00 +0000
+++ b/mysql-test/suite/innodb/r/innodb_bug51378.result	2010-04-01 12:14:51 +0000
@@ -0,0 +1,66 @@
+create table bug51378 (
+col1 int not null,
+col2 blob not null,
+col3 time not null) engine = innodb;
+create unique index idx on bug51378(col1, col2(31));
+alter table bug51378 add unique index idx2(col1, col2(31));
+create unique index idx3 on bug51378(col1, col3);
+SHOW CREATE TABLE bug51378;
+Table	Create Table
+bug51378	CREATE TABLE `bug51378` (
+  `col1` int(11) NOT NULL,
+  `col2` blob NOT NULL,
+  `col3` time NOT NULL,
+  UNIQUE KEY `idx3` (`col1`,`col3`),
+  UNIQUE KEY `idx` (`col1`,`col2`(31)),
+  UNIQUE KEY `idx2` (`col1`,`col2`(31))
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+drop index idx3 on bug51378;
+SHOW CREATE TABLE bug51378;
+Table	Create Table
+bug51378	CREATE TABLE `bug51378` (
+  `col1` int(11) NOT NULL,
+  `col2` blob NOT NULL,
+  `col3` time NOT NULL,
+  UNIQUE KEY `idx` (`col1`,`col2`(31)),
+  UNIQUE KEY `idx2` (`col1`,`col2`(31))
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+alter table bug51378 add primary key idx3(col1, col2(31));
+SHOW CREATE TABLE bug51378;
+Table	Create Table
+bug51378	CREATE TABLE `bug51378` (
+  `col1` int(11) NOT NULL,
+  `col2` blob NOT NULL,
+  `col3` time NOT NULL,
+  PRIMARY KEY (`col1`,`col2`(31)),
+  UNIQUE KEY `idx` (`col1`,`col2`(31)),
+  UNIQUE KEY `idx2` (`col1`,`col2`(31))
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+drop table bug51378;
+create table bug51378 (
+col1 int not null,
+col2 blob not null,
+col3 time not null, primary key(col1, col2(31))) engine = innodb;
+create unique index idx on bug51378(col1, col2(31));
+SHOW CREATE TABLE bug51378;
+Table	Create Table
+bug51378	CREATE TABLE `bug51378` (
+  `col1` int(11) NOT NULL,
+  `col2` blob NOT NULL,
+  `col3` time NOT NULL,
+  PRIMARY KEY (`col1`,`col2`(31)),
+  UNIQUE KEY `idx` (`col1`,`col2`(31))
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+drop table bug51378;
+create table bug51378 (
+col1 int not null,
+col2 int ) engine = innodb;
+create unique index idx on bug51378(col1, col2);
+SHOW CREATE TABLE bug51378;
+Table	Create Table
+bug51378	CREATE TABLE `bug51378` (
+  `col1` int(11) NOT NULL,
+  `col2` int(11) DEFAULT NULL,
+  UNIQUE KEY `idx` (`col1`,`col2`)
+) ENGINE=InnoDB DEFAULT CHARSET=latin1
+drop table bug51378;

=== modified file 'mysql-test/suite/innodb/t/innodb-consistent.test'
--- a/mysql-test/suite/innodb/t/innodb-consistent.test	2009-10-09 13:37:47 +0000
+++ b/mysql-test/suite/innodb/t/innodb-consistent.test	2010-04-01 12:07:40 +0000
@@ -26,7 +26,7 @@ replace into t1 select * from t2;
 connection b;
 set session transaction isolation level read committed;
 set autocommit=0;
-# should not cuase a lock wait.
+# should not cause a lock wait.
 delete from t2 where a=5;
 commit;
 delete from t2;
@@ -42,7 +42,7 @@ insert into t1 select * from t2;
 connection b;
 set session transaction isolation level read committed;
 set autocommit=0;
-# should not cuase a lock wait.
+# should not cause a lock wait.
 delete from t2 where a=5;
 commit;
 delete from t2;

=== modified file 'mysql-test/suite/innodb/t/innodb-index.test'
--- a/mysql-test/suite/innodb/t/innodb-index.test	2009-11-30 12:49:13 +0000
+++ b/mysql-test/suite/innodb/t/innodb-index.test	2010-04-01 12:04:43 +0000
@@ -1,5 +1,7 @@
 -- source include/have_innodb.inc
 
+let $MYSQLD_DATADIR= `select @@datadir`;
+
 let $innodb_file_format_check_orig=`select @@innodb_file_format_check`;
 
 create table t1(a int not null, b int, c char(10) not null, d varchar(20)) engine = innodb;
@@ -139,6 +141,8 @@ show create table t4;
 --error ER_CANT_CREATE_TABLE
 alter table t3 add constraint dc foreign key (a) references t1(a);
 show create table t3;
+# this should be fixed by MySQL (see Bug #51451)
+--error ER_WRONG_NAME_FOR_INDEX
 alter table t2 drop index b, add index (b);
 show create table t2;
 --error ER_ROW_IS_REFERENCED_2
@@ -146,7 +150,9 @@ delete from t1;
 --error ER_CANT_DROP_FIELD_OR_KEY
 drop index dc on t4;
 # there is no foreign key dc on t3
---replace_regex /'\.\/test\/#sql2-[0-9a-f-]*'/'#sql2-temporary'/
+--replace_regex /'[^']*test\/#sql2-[0-9a-f-]*'/'#sql2-temporary'/
+# Embedded server doesn't chdir to data directory
+--replace_result $MYSQLD_DATADIR ./ master-data/ ''
 --error ER_ERROR_ON_RENAME
 alter table t3 drop foreign key dc;
 alter table t4 drop foreign key dc;

=== modified file 'mysql-test/suite/innodb/t/innodb_bug44571.test'
--- a/mysql-test/suite/innodb/t/innodb_bug44571.test	2009-10-08 11:28:37 +0000
+++ b/mysql-test/suite/innodb/t/innodb_bug44571.test	2010-04-01 12:27:13 +0000
@@ -1,18 +1,23 @@
 #
 # Bug#44571 InnoDB Plugin crashes on ADD INDEX
 # http://bugs.mysql.com/44571
+# Please also refer to related fix in
+# http://bugs.mysql.com/47621
 #
 -- source include/have_innodb.inc
 -- source suite/innodb/include/have_innodb_plugin.inc
 
 CREATE TABLE bug44571 (foo INT) ENGINE=InnoDB;
 ALTER TABLE bug44571 CHANGE foo bar INT;
+# Create index with the old column name will fail,
+# because the CHANGE foo bar is successful. And
+# the column name change would communicate to
+# InnoDB with the fix from bug #47621
 -- error ER_KEY_COLUMN_DOES_NOT_EXITS
 ALTER TABLE bug44571 ADD INDEX bug44571b (foo);
-# The following will fail, because the CHANGE foo bar was
-# not communicated to InnoDB.
---error ER_NOT_KEYFILE
-ALTER TABLE bug44571 ADD INDEX bug44571b (bar);
---error ER_NOT_KEYFILE
-CREATE INDEX bug44571b ON bug44571 (bar);
+# The following create indexes should succeed,
+# indirectly confirm the CHANGE foo bar is successful.
+ALTER TABLE bug44571 ADD INDEX bug44571c (bar);
+DROP INDEX bug44571c ON bug44571;
+CREATE INDEX bug44571c ON bug44571 (bar);
 DROP TABLE bug44571;

=== added file 'mysql-test/suite/innodb/t/innodb_bug47622.test'
--- a/mysql-test/suite/innodb/t/innodb_bug47622.test	1970-01-01 00:00:00 +0000
+++ b/mysql-test/suite/innodb/t/innodb_bug47622.test	2010-04-01 11:30:11 +0000
@@ -0,0 +1,55 @@
+# This is the test for bug 47622. There could be index
+# metadata sequence mismatch between MySQL and Innodb
+# after creating index through FIC interfaces.
+# We resolve the problem by sync the index sequence
+# up when opening the table.
+
+--source include/have_innodb.inc
+
+connect (a,localhost,root,,);
+connect (b,localhost,root,,);
+
+# Create a table with a non-unique index
+CREATE TABLE bug47622(
+	`rule_key` int(11) NOT NULL DEFAULT '0',
+	`seq` smallint(6) NOT NULL DEFAULT '0',
+	`action` smallint(6) NOT NULL DEFAULT '0',
+	`arg_id` smallint(6) DEFAULT NULL,
+	`else_ind` TINYINT NOT NULL,
+	KEY IDX_A (`arg_id`)
+) ENGINE=InnoDB;
+
+connection a;
+
+# A subsequent creating unique index should not trigger
+# any error message. Unique index would be ranked ahead
+# of regular index.
+ALTER TABLE bug47622 ADD UNIQUE IDX_B (rule_key,else_ind,seq,action,arg_id);
+
+drop index IDX_B on bug47622;
+
+# In another connection, create additional set of normal
+# index and unique index. Again, unique index would be ranked
+# ahead of regular index.
+connection b;
+create index idx on bug47622(seq, arg_id);
+
+ALTER TABLE bug47622 ADD UNIQUE IDX_X (rule_key,else_ind,seq,action);
+
+drop table bug47622;
+
+# Create a table with one Primary key and a non-unique key
+CREATE TABLE bug47622 (
+  `a` int(11) NOT NULL,
+  `b` int(11) DEFAULT NULL,
+  `c` char(10) DEFAULT NULL,
+  `d` varchar(20) DEFAULT NULL,
+  PRIMARY KEY (`a`),
+  KEY `b` (`b`)
+) ENGINE=InnoDB;
+
+# Add two index with one unique and one non-unique.
+# Index sequence is "PRIMARY", "c", "b" and "d"
+alter table bug47622 add unique index (c), add index (d);
+
+drop table bug47622;

=== added file 'mysql-test/suite/innodb/t/innodb_bug51378.test'
--- a/mysql-test/suite/innodb/t/innodb_bug51378.test	1970-01-01 00:00:00 +0000
+++ b/mysql-test/suite/innodb/t/innodb_bug51378.test	2010-04-01 12:14:51 +0000
@@ -0,0 +1,77 @@
+# This is the test for bug 51378. Unique index created
+# through "create index" and "alter table add unique index"
+# interfaces should not be treated as primary index if indexed
+# columns contain one or more column prefix(es) (only prefix/part of
+# the column is indexed)
+# On the other hand, if there is a unique index covers all
+# columns of a table, and they are non-null columns, and
+# full length of the column are indexed, then this index
+# will be created as primary index
+# Following queries test various scenario, no mismatch
+# error message should be printed.
+--source include/have_innodb.inc
+
+# Create a table contains a BLOB column
+create table bug51378 (
+	col1 int not null,
+	col2 blob not null,
+	col3 time not null) engine = innodb;
+
+# Create following unique indexes on 'col1' and 'col2(31)'
+# of the table, the index should not be treated as primary
+# key because it indexes only first 31 bytes of col2.
+# Thus it contains "column prefix", and will not be
+# upgraded to primary index.
+# There should not be mismatch message printed in the
+# errorlog
+create unique index idx on bug51378(col1, col2(31));
+
+alter table bug51378 add unique index idx2(col1, col2(31));
+
+# Unique index on 'col1' and 'col3' will be created as primary index,
+# since the index does not contain column prefix
+create unique index idx3 on bug51378(col1, col3);
+
+# Show create table would show idx3 created as unique index, internally,
+# idx3 is treated as primary index both by MySQL and Innodb
+SHOW CREATE TABLE bug51378;
+
+# "GEN_CLUST_INDEX" will be re-created as default primary index
+# after idx3 is dropped
+drop index idx3 on bug51378;
+
+SHOW CREATE TABLE bug51378;
+
+# Or we can add the primary key through alter table interfaces
+alter table bug51378 add primary key idx3(col1, col2(31));
+
+SHOW CREATE TABLE bug51378;
+
+drop table bug51378;
+
+# Or we can create such primary key through create table interfaces
+create table bug51378 (
+        col1 int not null,
+        col2 blob not null,
+        col3 time not null, primary key(col1, col2(31))) engine = innodb;
+
+# Unique index on one or more column prefix(es) will be created
+# as non-cluster index
+create unique index idx on bug51378(col1, col2(31));
+
+SHOW CREATE TABLE bug51378;
+
+drop table bug51378;
+
+# If a table has a NULLABLE column, unique index on it will not
+# be treated as primary index.
+create table bug51378 (
+	col1 int not null,
+        col2 int ) engine = innodb;
+
+# This will be created as non-cluster index since col2 is nullable
+create unique index idx on bug51378(col1, col2);
+
+SHOW CREATE TABLE bug51378;
+
+drop table bug51378;

=== modified file 'storage/innodb_plugin/CMakeLists.txt'
--- a/storage/innodb_plugin/CMakeLists.txt	2009-12-14 08:12:26 +0000
+++ b/storage/innodb_plugin/CMakeLists.txt	2010-04-01 12:56:22 +0000
@@ -78,7 +78,11 @@ SET(INNODB_PLUGIN_SOURCES	btr/btr0btr.c 
 			trx/trx0i_s.c trx/trx0purge.c trx/trx0rec.c trx/trx0roll.c trx/trx0rseg.c
 			trx/trx0sys.c trx/trx0trx.c trx/trx0undo.c
 			usr/usr0sess.c
-			ut/ut0byte.c ut/ut0dbg.c ut/ut0mem.c ut/ut0rnd.c ut/ut0ut.c ut/ut0vec.c
+			ut/ut0byte.c ut/ut0dbg.c ut/ut0mem.c ut/ut0rbt.c ut/ut0rnd.c ut/ut0ut.c ut/ut0vec.c
 			ut/ut0list.c ut/ut0wqueue.c)
-ADD_DEFINITIONS(-DHAVE_WINDOWS_ATOMICS -DIB_HAVE_PAUSE_INSTRUCTION)
+# Windows atomics do not perform well. Disable Windows atomics by default.
+# See bug#52102 for details.
+#ADD_DEFINITIONS(-DHAVE_WINDOWS_ATOMICS -DINNODB_RW_LOCKS_USE_ATOMICS -DHAVE_IB_PAUSE_INSTRUCTION)
+ADD_DEFINITIONS(-DHAVE_IB_PAUSE_INSTRUCTION)
+
 MYSQL_STORAGE_ENGINE(INNODB_PLUGIN)

=== modified file 'storage/innodb_plugin/ChangeLog'
--- a/storage/innodb_plugin/ChangeLog	2009-11-30 13:42:26 +0000
+++ b/storage/innodb_plugin/ChangeLog	2010-04-01 13:00:43 +0000
@@ -1,3 +1,188 @@
+2010-03-18	The InnoDB Team
+
+	* CMakeLists.txt:
+	Fix Bug#52102 InnoDB Plugin shows performance drop compared to
+	InnoDB (Windows)
+
+2010-03-18	The InnoDB Team
+
+	* buf0buf.ic:
+	When comparing the time of the first access to a block against
+	innodb_old_blocks_time, use 32-bit arithmetics. The comparison was
+	incorrect on 64-bit systems.
+
+2010-03-11	The InnoDB Team
+
+	* buf0buf.h, buf0buf.ic:
+	Fix and clarify the latching of some buf_block_t members.
+	Note that check_index_page_at_flush is not protected by any mutex.
+	Note and assert that lock_hash_val is protected by the rw-latch.
+
+2010-03-10	The InnoDB Team
+
+	* trx/trx0sys.c:
+	Fix Bug#51653 outdated reference to set-variable
+
+2010-03-10	The InnoDB Team
+
+	* handler/ha_innodb.cc, mysql-test/innodb_bug21704.result,
+	mysql-test/innodb_bug47621.result, mysql-test/innodb_bug47621.test:
+	Fix Bug#47621 MySQL and InnoDB data dictionaries will become out of
+	sync when renaming columns
+
+2010-03-10	The InnoDB Team
+
+	* handler/ha_innodb.cc:
+	Fix Bug#51356 Many Valgrind errors in error messages
+	with concurrent DDL
+
+2010-03-10	The InnoDB Team
+
+	* handler/ha_innodb.cc, handler/handler0alter.cc,
+	mysql-test/innodb_bug51378.result, mysql-test/innodb_bug51378.test:
+	Fix Bug#51378 Init 'ref_length' to correct value, in case an out
+	of bound MySQL primary_key
+
+2010-03-10	The InnoDB Team
+
+	* log/log0recv.c:
+	Remove a bogus assertion about page numbers exceeding 0x90000000
+	in the redo log. Abort when encountering a corrupted redo log
+	record, unless innodb_force_recovery is set.
+
+2010-03-09	The InnoDB Team
+
+	* handler/ha_innodb.cc:
+	Make SHOW ENGINE INNODB MUTEX STATUS display SUM(os_waits)
+	for the buffer pool block mutexes and locks.
+
+2010-03-08	The InnoDB Team
+
+	* fil/fil0fil.c:
+	Fix ALTER TABLE ... IMPORT TABLESPACE of compressed tables.
+
+2010-03-03	The InnoDB Team
+
+	* handler/handler0alter.cc, innodb-index.result, innodb-index.test,
+	innodb.result, innodb.test:
+	Disallow a duplicate index name when creating an index.
+
+2010-02-11	The InnoDB Team
+
+	* include/mem0mem.h, include/mem0mem.ic, mem/mem0mem.c:
+	Fix Bug#49535 Available memory check slows down crash
+	recovery tens of times
+
+2010-02-09	The InnoDB Team
+
+	* buf/buf0buf.c:
+	Fix Bug#38901 InnoDB logs error repeatedly when trying to load
+	page into buffer pool
+
+2010-02-09	The InnoDB Team
+
+	* srv/srv0srv.c:
+	Let the master thread sleep if the amount of work to be done is
+	calibrated as taking less than a second.
+
+2010-02-04	The InnoDB Team
+
+	* btr/btr0btr.c, btr/btr0cur.c, btr/btr0pcur.c, buf/buf0buf.c,
+	include/btr0btr.h, include/btr0cur.h, include/btr0pcur.h,
+	include/btr0pcur.ic, include/buf0buf.h, row/row0ins.c, row/row0sel.c:
+	Pass the file name and line number of the caller of the
+	b-tree cursor functions to the buffer pool requests, in order
+	to make the latch diagnostics more accurate.
+
+2010-02-03	The InnoDB Team
+
+	* lock/lock0lock.c:
+	Fix Bug#49001 SHOW INNODB STATUS deadlock info incorrect
+	when deadlock detection aborts
+
+2010-02-03	The InnoDB Team
+
+	* buf/buf0lru.c:
+	Fix Bug#35077 Very slow DROP TABLE (ALTER TABLE, OPTIMIZE TABLE)
+	on compressed tables
+
+2010-02-03	The InnoDB Team
+
+	* handler/ha_innodb.cc, include/row0mysql.h, row/row0mysql.c:
+	Clean up CHECK TABLE error handling.
+
+2010-02-01	The InnoDB Team
+
+	* handler/ha_innodb.cc, mysql-test/innodb-autoinc.test,
+	mysql-test/innodb-autoinc.result,
+	mysql-test/innodb-autoinc-44030.test,
+	mysql-test/innodb-autoinc-44030.result:
+	Fix Bug#49497 Error 1467 (ER_AUTOINC_READ_FAILED) on inserting
+	a negative value
+
+2010-01-27	The InnoDB Team
+
+	* include/row0mysql.h, log/log0recv.c, row/row0mysql.c:
+	Drop temporary tables at startup.
+	This addresses the third aspect of
+	Bug#41609 Crash recovery does not work for InnoDB temporary tables.
+
+2010-01-21	The InnoDB Team
+
+	* buf/buf0buf.c:
+	Do not merge buffered inserts to compressed pages before
+	the redo log has been applied in crash recovery.
+
+2010-01-13	The InnoDB Team
+
+	* row/row0sel.c:
+	On the READ UNCOMMITTED isolation level, do not attempt to access
+	a clustered index record that has been marked for deletion. The
+	built-in InnoDB in MySQL 5.1 and earlier would attempt to retrieve
+	a previous version of the record in this case.
+
+2010-01-13	The InnoDB Team
+
+	* buf/buf0buf.c:
+	When disabling the adaptive hash index, check the block state
+	before checking block->is_hashed, because the latter may be
+	uninitialized right after server startup.
+
+2010-01-12	The InnoDB Team
+
+	* handler/ha_innodb.cc, handler/ha_innodb.h:
+	Fix Bug#46193 crash when accessing tables after enabling
+	innodb_force_recovery option
+
+2010-01-12	The InnoDB Team
+
+	* row/row0mysql.c:
+	Fix Bug#49238 Creating/Dropping a temporary table while at 1023
+	transactions will cause assert.
+
+2009-12-02	The InnoDB Team
+
+	* srv/srv0start.c:
+	Display the zlib version number at startup.
+	InnoDB compressed tables use zlib, and the implementation depends
+	on the zlib function compressBound(), whose definition was slightly
+	changed in zlib version 1.2.3.1 in 2006. MySQL bundles zlib 1.2.3
+	from 2005, but some installations use a more recent zlib.
+
+2009-11-30	The InnoDB Team
+
+	* dict/dict0crea.c, dict/dict0mem.c, dict/dict0load.c,
+	dict/dict0boot.c, fil/fil0fil.c, handler/ha_innodb.cc,
+	include/dict0mem.h, row/row0mysql.c:
+	Fix the bogus warning messages for non-existing temporary
+	tables that were reported in
+	Bug#41609 Crash recovery does not work for InnoDB temporary tables.
+	The actual crash recovery bug was corrected on 2009-04-29.
+
+2009-11-27	The InnoDB Team
+
+	InnoDB Plugin 1.0.6 released
+
 2009-11-20	The InnoDB Team
 
 	* handler/ha_innodb.cc:
@@ -79,8 +264,8 @@
 	sync/sync0arr.c, sync/sync0sync.c, thr/thr0loc.c, trx/trx0i_s.c,
 	trx/trx0purge.c, trx/trx0rseg.c, trx/trx0sys.c, trx/trx0undo.c,
 	usr/usr0sess.c, ut/ut0mem.c:
-	Fix Bug #45992 innodb memory not freed after shutdown
-	Fix Bug #46656 InnoDB plugin: memory leaks (Valgrind)
+	Fix Bug#45992 innodb memory not freed after shutdown
+	Fix Bug#46656 InnoDB plugin: memory leaks (Valgrind)
 
 2009-10-29	The InnoDB Team
 
@@ -422,7 +607,7 @@
 	* dict/dict0dict.c:
 	When an index column cannot be found in the table during index
 	creation, display additional diagnostic before an assertion failure.
-	This does NOT fix Bug #44571 InnoDB Plugin crashes on ADD INDEX,
+	This does NOT fix Bug#44571 InnoDB Plugin crashes on ADD INDEX,
 	but it helps understand the reason of the crash.
 
 2009-06-17	The InnoDB Team
@@ -535,6 +720,12 @@
 	Fix Bug#44320 InnoDB: missing DB_ROLL_PTR in Table Monitor COLUMNS
 	output
 
+2009-04-29	The InnoDB Team
+
+	* fil/fil0fil.c, include/fil0fil.h, include/mtr0mtr.h,
+	log/log0recv.c:
+	Fix Bug#41609 Crash recovery does not work for InnoDB temporary tables
+
 2009-04-23	The InnoDB Team
 
 	* row/row0mysql.c:

=== modified file 'storage/innodb_plugin/Makefile.am'
--- a/storage/innodb_plugin/Makefile.am	2009-10-09 14:02:18 +0000
+++ b/storage/innodb_plugin/Makefile.am	2010-04-01 12:56:22 +0000
@@ -217,6 +217,7 @@ noinst_HEADERS=		\
 			include/ut0lst.h	\
 			include/ut0mem.h	\
 			include/ut0mem.ic	\
+			include/ut0rbt.h	\
 			include/ut0rnd.h	\
 			include/ut0rnd.ic	\
 			include/ut0sort.h	\
@@ -318,6 +319,7 @@ libinnobase_a_SOURCES=	\
 			ut/ut0dbg.c			\
 			ut/ut0list.c			\
 			ut/ut0mem.c			\
+			ut/ut0rbt.c			\
 			ut/ut0rnd.c			\
 			ut/ut0ut.c			\
 			ut/ut0vec.c			\

=== modified file 'storage/innodb_plugin/btr/btr0btr.c'
--- a/storage/innodb_plugin/btr/btr0btr.c	2009-11-30 13:42:26 +0000
+++ b/storage/innodb_plugin/btr/btr0btr.c	2010-04-01 11:59:25 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -592,13 +592,15 @@ an x-latch on the tree.
 @return	rec_get_offsets() of the node pointer record */
 static
 ulint*
-btr_page_get_father_node_ptr(
-/*=========================*/
+btr_page_get_father_node_ptr_func(
+/*==============================*/
 	ulint*		offsets,/*!< in: work area for the return value */
 	mem_heap_t*	heap,	/*!< in: memory heap to use */
 	btr_cur_t*	cursor,	/*!< in: cursor pointing to user record,
 				out: cursor on node pointer record,
 				its page x-latched */
+	const char*	file,	/*!< in: file name */
+	ulint		line,	/*!< in: line where called */
 	mtr_t*		mtr)	/*!< in: mtr */
 {
 	dtuple_t*	tuple;
@@ -622,7 +624,8 @@ btr_page_get_father_node_ptr(
 	tuple = dict_index_build_node_ptr(index, user_rec, 0, heap, level);
 
 	btr_cur_search_to_nth_level(index, level + 1, tuple, PAGE_CUR_LE,
-				    BTR_CONT_MODIFY_TREE, cursor, 0, mtr);
+				    BTR_CONT_MODIFY_TREE, cursor, 0,
+				    file, line, mtr);
 
 	node_ptr = btr_cur_get_rec(cursor);
 	ut_ad(!page_rec_is_comp(node_ptr)
@@ -670,6 +673,9 @@ btr_page_get_father_node_ptr(
 	return(offsets);
 }
 
+#define btr_page_get_father_node_ptr(of,heap,cur,mtr)			\
+	btr_page_get_father_node_ptr_func(of,heap,cur,__FILE__,__LINE__,mtr)
+
 /************************************************************//**
 Returns the upper level node pointer to a page. It is assumed that mtr holds
 an x-latch on the tree.
@@ -1662,11 +1668,13 @@ Inserts a data tuple to a tree on a non-
 that mtr holds an x-latch on the tree. */
 UNIV_INTERN
 void
-btr_insert_on_non_leaf_level(
-/*=========================*/
+btr_insert_on_non_leaf_level_func(
+/*==============================*/
 	dict_index_t*	index,	/*!< in: index */
 	ulint		level,	/*!< in: level, must be > 0 */
 	dtuple_t*	tuple,	/*!< in: the record to be inserted */
+	const char*	file,	/*!< in: file name */
+	ulint		line,	/*!< in: line where called */
 	mtr_t*		mtr)	/*!< in: mtr */
 {
 	big_rec_t*	dummy_big_rec;
@@ -1678,7 +1686,7 @@ btr_insert_on_non_leaf_level(
 
 	btr_cur_search_to_nth_level(index, level, tuple, PAGE_CUR_LE,
 				    BTR_CONT_MODIFY_TREE,
-				    &cursor, 0, mtr);
+				    &cursor, 0, file, line, mtr);
 
 	err = btr_cur_pessimistic_insert(BTR_NO_LOCKING_FLAG
 					 | BTR_KEEP_SYS_FLAG

=== modified file 'storage/innodb_plugin/btr/btr0cur.c'
--- a/storage/innodb_plugin/btr/btr0cur.c	2009-07-30 12:42:56 +0000
+++ b/storage/innodb_plugin/btr/btr0cur.c	2010-04-01 11:59:25 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
 Copyright (c) 2008, Google Inc.
 
 Portions of this file contain modifications contributed and copyrighted by
@@ -342,6 +342,8 @@ btr_cur_search_to_nth_level(
 	ulint		has_search_latch,/*!< in: info on the latch mode the
 				caller currently has on btr_search_latch:
 				RW_S_LATCH, or 0 */
+	const char*	file,	/*!< in: file name */
+	ulint		line,	/*!< in: line where called */
 	mtr_t*		mtr)	/*!< in: mtr */
 {
 	page_cur_t*	page_cursor;
@@ -520,7 +522,7 @@ btr_cur_search_to_nth_level(
 retry_page_get:
 		block = buf_page_get_gen(space, zip_size, page_no,
 					 rw_latch, guess, buf_mode,
-					 __FILE__, __LINE__, mtr);
+					 file, line, mtr);
 		if (block == NULL) {
 			/* This must be a search to perform an insert;
 			try insert to the insert buffer */
@@ -677,13 +679,15 @@ func_exit:
 Opens a cursor at either end of an index. */
 UNIV_INTERN
 void
-btr_cur_open_at_index_side(
-/*=======================*/
+btr_cur_open_at_index_side_func(
+/*============================*/
 	ibool		from_left,	/*!< in: TRUE if open to the low end,
 					FALSE if to the high end */
 	dict_index_t*	index,		/*!< in: index */
 	ulint		latch_mode,	/*!< in: latch mode */
 	btr_cur_t*	cursor,		/*!< in: cursor */
+	const char*	file,		/*!< in: file name */
+	ulint		line,		/*!< in: line where called */
 	mtr_t*		mtr)		/*!< in: mtr */
 {
 	page_cur_t*	page_cursor;
@@ -728,7 +732,7 @@ btr_cur_open_at_index_side(
 		page_t*		page;
 		block = buf_page_get_gen(space, zip_size, page_no,
 					 RW_NO_LATCH, NULL, BUF_GET,
-					 __FILE__, __LINE__, mtr);
+					 file, line, mtr);
 		page = buf_block_get_frame(block);
 		ut_ad(0 == ut_dulint_cmp(index->id,
 					 btr_page_get_index_id(page)));
@@ -808,11 +812,13 @@ btr_cur_open_at_index_side(
 Positions a cursor at a randomly chosen position within a B-tree. */
 UNIV_INTERN
 void
-btr_cur_open_at_rnd_pos(
-/*====================*/
+btr_cur_open_at_rnd_pos_func(
+/*=========================*/
 	dict_index_t*	index,		/*!< in: index */
 	ulint		latch_mode,	/*!< in: BTR_SEARCH_LEAF, ... */
 	btr_cur_t*	cursor,		/*!< in/out: B-tree cursor */
+	const char*	file,		/*!< in: file name */
+	ulint		line,		/*!< in: line where called */
 	mtr_t*		mtr)		/*!< in: mtr */
 {
 	page_cur_t*	page_cursor;
@@ -847,7 +853,7 @@ btr_cur_open_at_rnd_pos(
 
 		block = buf_page_get_gen(space, zip_size, page_no,
 					 RW_NO_LATCH, NULL, BUF_GET,
-					 __FILE__, __LINE__, mtr);
+					 file, line, mtr);
 		page = buf_block_get_frame(block);
 		ut_ad(0 == ut_dulint_cmp(index->id,
 					 btr_page_get_index_id(page)));
@@ -3100,7 +3106,8 @@ btr_estimate_n_rows_in_range(
 
 		btr_cur_search_to_nth_level(index, 0, tuple1, mode1,
 					    BTR_SEARCH_LEAF | BTR_ESTIMATE,
-					    &cursor, 0, &mtr);
+					    &cursor, 0,
+					    __FILE__, __LINE__, &mtr);
 	} else {
 		btr_cur_open_at_index_side(TRUE, index,
 					   BTR_SEARCH_LEAF | BTR_ESTIMATE,
@@ -3117,7 +3124,8 @@ btr_estimate_n_rows_in_range(
 
 		btr_cur_search_to_nth_level(index, 0, tuple2, mode2,
 					    BTR_SEARCH_LEAF | BTR_ESTIMATE,
-					    &cursor, 0, &mtr);
+					    &cursor, 0,
+					    __FILE__, __LINE__, &mtr);
 	} else {
 		btr_cur_open_at_index_side(FALSE, index,
 					   BTR_SEARCH_LEAF | BTR_ESTIMATE,
@@ -4252,7 +4260,7 @@ btr_free_externally_stored_field(
 		/* In the rollback of uncommitted transactions, we may
 		encounter a clustered index record whose BLOBs have
 		not been written.  There is nothing to free then. */
-		ut_a(rb_ctx == RB_RECOVERY);
+		ut_a(rb_ctx == RB_RECOVERY || rb_ctx == RB_RECOVERY_PURGE_REC);
 		return;
 	}
 
@@ -4298,7 +4306,7 @@ btr_free_externally_stored_field(
 		    || (mach_read_from_1(field_ref + BTR_EXTERN_LEN)
 			& BTR_EXTERN_OWNER_FLAG)
 		    /* Rollback and inherited field */
-		    || (rb_ctx != RB_NONE
+		    || ((rb_ctx == RB_NORMAL || rb_ctx == RB_RECOVERY)
 			&& (mach_read_from_1(field_ref + BTR_EXTERN_LEN)
 			    & BTR_EXTERN_INHERITED_FLAG))) {
 

=== modified file 'storage/innodb_plugin/btr/btr0pcur.c'
--- a/storage/innodb_plugin/btr/btr0pcur.c	2009-05-27 09:45:59 +0000
+++ b/storage/innodb_plugin/btr/btr0pcur.c	2010-04-01 12:27:13 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -205,10 +205,12 @@ record and it can be restored on a user 
 are identical to the ones of the original user record */
 UNIV_INTERN
 ibool
-btr_pcur_restore_position(
-/*======================*/
+btr_pcur_restore_position_func(
+/*===========================*/
 	ulint		latch_mode,	/*!< in: BTR_SEARCH_LEAF, ... */
 	btr_pcur_t*	cursor,		/*!< in: detached persistent cursor */
+	const char*	file,		/*!< in: file name */
+	ulint		line,		/*!< in: line where called */
 	mtr_t*		mtr)		/*!< in: mtr */
 {
 	dict_index_t*	index;
@@ -217,6 +219,9 @@ btr_pcur_restore_position(
 	ulint		old_mode;
 	mem_heap_t*	heap;
 
+	ut_ad(mtr);
+	ut_ad(mtr->state == MTR_ACTIVE);
+
 	index = btr_cur_get_index(btr_pcur_get_btr_cur(cursor));
 
 	if (UNIV_UNLIKELY(cursor->old_stored != BTR_PCUR_OLD_STORED)
@@ -257,7 +262,8 @@ btr_pcur_restore_position(
 		if (UNIV_LIKELY(buf_page_optimistic_get(
 					latch_mode,
 					cursor->block_when_stored,
-					cursor->modify_clock, mtr))) {
+					cursor->modify_clock,
+					file, line, mtr))) {
 			cursor->pos_state = BTR_PCUR_IS_POSITIONED;
 
 			buf_block_dbg_add_level(btr_pcur_get_block(cursor),
@@ -312,8 +318,8 @@ btr_pcur_restore_position(
 		mode = PAGE_CUR_L;
 	}
 
-	btr_pcur_open_with_no_init(index, tuple, mode, latch_mode,
-				   cursor, 0, mtr);
+	btr_pcur_open_with_no_init_func(index, tuple, mode, latch_mode,
+					cursor, 0, file, line, mtr);
 
 	/* Restore the old search mode */
 	cursor->search_mode = old_mode;
@@ -553,8 +559,8 @@ before first in tree. The latching mode 
 BTR_MODIFY_LEAF. */
 UNIV_INTERN
 void
-btr_pcur_open_on_user_rec(
-/*======================*/
+btr_pcur_open_on_user_rec_func(
+/*===========================*/
 	dict_index_t*	index,		/*!< in: index */
 	const dtuple_t*	tuple,		/*!< in: tuple on which search done */
 	ulint		mode,		/*!< in: PAGE_CUR_L, ... */
@@ -562,9 +568,12 @@ btr_pcur_open_on_user_rec(
 					BTR_MODIFY_LEAF */
 	btr_pcur_t*	cursor,		/*!< in: memory buffer for persistent
 					cursor */
+	const char*	file,		/*!< in: file name */
+	ulint		line,		/*!< in: line where called */
 	mtr_t*		mtr)		/*!< in: mtr */
 {
-	btr_pcur_open(index, tuple, mode, latch_mode, cursor, mtr);
+	btr_pcur_open_func(index, tuple, mode, latch_mode, cursor,
+			   file, line, mtr);
 
 	if ((mode == PAGE_CUR_GE) || (mode == PAGE_CUR_G)) {
 

=== modified file 'storage/innodb_plugin/buf/buf0buddy.c'
--- a/storage/innodb_plugin/buf/buf0buddy.c	2009-05-27 09:45:59 +0000
+++ b/storage/innodb_plugin/buf/buf0buddy.c	2010-04-01 13:01:22 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2006, 2010, Innobase Oy. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -391,6 +391,8 @@ buf_buddy_relocate_block(
 		UT_LIST_ADD_FIRST(list, buf_pool->zip_clean, dpage);
 	}
 
+	UNIV_MEM_INVALID(bpage, sizeof *bpage);
+
 	mutex_exit(&buf_pool_zip_mutex);
 	return(TRUE);
 }

=== modified file 'storage/innodb_plugin/buf/buf0buf.c'
--- a/storage/innodb_plugin/buf/buf0buf.c	2009-11-30 11:32:05 +0000
+++ b/storage/innodb_plugin/buf/buf0buf.c	2010-04-01 12:56:22 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
 Copyright (c) 2008, Google Inc.
 
 Portions of this file contain modifications contributed and copyrighted by
@@ -242,6 +242,8 @@ the read requests for the whole area.
 #ifndef UNIV_HOTBACKUP
 /** Value in microseconds */
 static const int WAIT_FOR_READ	= 5000;
+/** Number of attemtps made to read in a page in the buffer pool */
+static const ulint BUF_PAGE_READ_MAX_RETRIES = 100;
 
 /** The buffer buf_pool of the database */
 UNIV_INTERN buf_pool_t*	buf_pool = NULL;
@@ -1058,7 +1060,9 @@ buf_pool_drop_hash_index(void)
 				when we have an x-latch on btr_search_latch;
 				see the comment in buf0buf.h */
 
-				if (!block->is_hashed) {
+				if (buf_block_get_state(block)
+				    != BUF_BLOCK_FILE_PAGE
+				    || !block->is_hashed) {
 					continue;
 				}
 
@@ -1187,8 +1191,6 @@ buf_relocate(
 
 	HASH_DELETE(buf_page_t, hash, buf_pool->page_hash, fold, bpage);
 	HASH_INSERT(buf_page_t, hash, buf_pool->page_hash, fold, dpage);
-
-	UNIV_MEM_INVALID(bpage, sizeof *bpage);
 }
 
 /********************************************************************//**
@@ -2034,8 +2036,10 @@ buf_page_get_gen(
 	unsigned	access_time;
 	ulint		fix_type;
 	ibool		must_read;
+	ulint		retries = 0;
 
 	ut_ad(mtr);
+	ut_ad(mtr->state == MTR_ACTIVE);
 	ut_ad((rw_latch == RW_S_LATCH)
 	      || (rw_latch == RW_X_LATCH)
 	      || (rw_latch == RW_NO_LATCH));
@@ -2088,7 +2092,29 @@ loop2:
 			return(NULL);
 		}
 
-		buf_read_page(space, zip_size, offset);
+		if (buf_read_page(space, zip_size, offset)) {
+			retries = 0;
+		} else if (retries < BUF_PAGE_READ_MAX_RETRIES) {
+			++retries;
+		} else {
+			fprintf(stderr, "InnoDB: Error: Unable"
+				" to read tablespace %lu page no"
+				" %lu into the buffer pool after"
+				" %lu attempts\n"
+				"InnoDB: The most probable cause"
+				" of this error may be that the"
+				" table has been corrupted.\n"
+				"InnoDB: You can try to fix this"
+				" problem by using"
+				" innodb_force_recovery.\n"
+				"InnoDB: Please see reference manual"
+				" for more details.\n"
+				"InnoDB: Aborting...\n",
+				space, offset,
+				BUF_PAGE_READ_MAX_RETRIES);
+
+			ut_error;
+		}
 
 #if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
 		ut_a(++buf_dbg_counter % 37 || buf_validate());
@@ -2196,22 +2222,8 @@ wait_until_unfixed:
 			ut_ad(!block->page.in_flush_list);
 		} else {
 			/* Relocate buf_pool->flush_list. */
-			buf_page_t*	b;
-
-			b = UT_LIST_GET_PREV(list, &block->page);
-			ut_ad(block->page.in_flush_list);
-			UT_LIST_REMOVE(list, buf_pool->flush_list,
-				       &block->page);
-
-			if (b) {
-				UT_LIST_INSERT_AFTER(
-					list, buf_pool->flush_list, b,
-					&block->page);
-			} else {
-				UT_LIST_ADD_FIRST(
-					list, buf_pool->flush_list,
-					&block->page);
-			}
+			buf_flush_relocate_on_flush_list(bpage,
+							 &block->page);
 		}
 
 		/* Buffer-fix, I/O-fix, and X-latch the block
@@ -2225,6 +2237,9 @@ wait_until_unfixed:
 		block->page.buf_fix_count = 1;
 		buf_block_set_io_fix(block, BUF_IO_READ);
 		rw_lock_x_lock(&block->lock);
+
+		UNIV_MEM_INVALID(bpage, sizeof *bpage);
+
 		mutex_exit(&block->mutex);
 		mutex_exit(&buf_pool_zip_mutex);
 		buf_pool->n_pend_unzip++;
@@ -2237,7 +2252,7 @@ wait_until_unfixed:
 		while not holding buf_pool_mutex or block->mutex. */
 		success = buf_zip_decompress(block, srv_use_checksums);
 
-		if (UNIV_LIKELY(success)) {
+		if (UNIV_LIKELY(success && !recv_no_ibuf_operations)) {
 			ibuf_merge_or_delete_for_page(block, space, offset,
 						      zip_size, TRUE);
 		}
@@ -2356,8 +2371,8 @@ page.
 @return	TRUE if success */
 UNIV_INTERN
 ibool
-buf_page_optimistic_get_func(
-/*=========================*/
+buf_page_optimistic_get(
+/*====================*/
 	ulint		rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */
 	buf_block_t*	block,	/*!< in: guessed buffer block */
 	ib_uint64_t	modify_clock,/*!< in: modify clock value if mode is
@@ -2370,7 +2385,9 @@ buf_page_optimistic_get_func(
 	ibool		success;
 	ulint		fix_type;
 
-	ut_ad(mtr && block);
+	ut_ad(block);
+	ut_ad(mtr);
+	ut_ad(mtr->state == MTR_ACTIVE);
 	ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
 
 	mutex_enter(&block->mutex);
@@ -2482,6 +2499,7 @@ buf_page_get_known_nowait(
 	ulint		fix_type;
 
 	ut_ad(mtr);
+	ut_ad(mtr->state == MTR_ACTIVE);
 	ut_ad((rw_latch == RW_S_LATCH) || (rw_latch == RW_X_LATCH));
 
 	mutex_enter(&block->mutex);
@@ -2581,6 +2599,9 @@ buf_page_try_get_func(
 	ibool		success;
 	ulint		fix_type;
 
+	ut_ad(mtr);
+	ut_ad(mtr->state == MTR_ACTIVE);
+
 	buf_pool_mutex_enter();
 	block = buf_block_hash_get(space_id, page_no);
 
@@ -2954,6 +2975,7 @@ buf_page_create(
 	ulint		time_ms		= ut_time_ms();
 
 	ut_ad(mtr);
+	ut_ad(mtr->state == MTR_ACTIVE);
 	ut_ad(space || !zip_size);
 
 	free_block = buf_LRU_get_free_block(0);

=== modified file 'storage/innodb_plugin/buf/buf0flu.c'
--- a/storage/innodb_plugin/buf/buf0flu.c	2009-11-03 10:02:19 +0000
+++ b/storage/innodb_plugin/buf/buf0flu.c	2010-04-01 13:01:22 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -88,6 +88,138 @@ buf_flush_validate_low(void);
 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 
 /********************************************************************//**
+Insert a block in the flush_rbt and returns a pointer to its
+predecessor or NULL if no predecessor. The ordering is maintained
+on the basis of the <oldest_modification, space, offset> key.
+@return pointer to the predecessor or NULL if no predecessor. */
+static
+buf_page_t*
+buf_flush_insert_in_flush_rbt(
+/*==========================*/
+	buf_page_t*	bpage)		/*!< in: bpage to be inserted. */
+{
+	buf_page_t*		prev = NULL;
+	const ib_rbt_node_t*	c_node;
+	const ib_rbt_node_t*	p_node;
+
+	ut_ad(buf_pool_mutex_own());
+
+	/* Insert this buffer into the rbt. */
+	c_node = rbt_insert(buf_pool->flush_rbt, &bpage, &bpage);
+	ut_a(c_node != NULL);
+
+	/* Get the predecessor. */
+	p_node = rbt_prev(buf_pool->flush_rbt, c_node);
+
+	if (p_node != NULL) {
+		prev = *rbt_value(buf_page_t*, p_node);
+		ut_a(prev != NULL);
+	}
+
+	return(prev);
+}
+
+/********************************************************************//**
+Delete a bpage from the flush_rbt. */
+static
+void
+buf_flush_delete_from_flush_rbt(
+/*============================*/
+	buf_page_t*	bpage)		/*!< in: bpage to be removed. */
+{
+
+	ibool	ret = FALSE;
+
+	ut_ad(buf_pool_mutex_own());
+	ret = rbt_delete(buf_pool->flush_rbt, &bpage);
+	ut_ad(ret);
+}
+
+/********************************************************************//**
+Compare two modified blocks in the buffer pool. The key for comparison
+is:
+key = <oldest_modification, space, offset>
+This comparison is used to maintian ordering of blocks in the
+buf_pool->flush_rbt.
+Note that for the purpose of flush_rbt, we only need to order blocks
+on the oldest_modification. The other two fields are used to uniquely
+identify the blocks.
+@return < 0 if b2 < b1, 0 if b2 == b1, > 0 if b2 > b1 */
+static
+int
+buf_flush_block_cmp(
+/*================*/
+	const void*	p1,		/*!< in: block1 */
+	const void*	p2)		/*!< in: block2 */
+{
+	int		ret;
+
+	ut_ad(p1 != NULL);
+	ut_ad(p2 != NULL);
+
+	const buf_page_t* b1 = *(const buf_page_t**) p1;
+	const buf_page_t* b2 = *(const buf_page_t**) p2;
+
+	ut_ad(b1 != NULL);
+	ut_ad(b2 != NULL);
+
+	ut_ad(b1->in_flush_list);
+	ut_ad(b2->in_flush_list);
+
+	if (b2->oldest_modification
+	    > b1->oldest_modification) {
+		return(1);
+	}
+
+	if (b2->oldest_modification
+	    < b1->oldest_modification) {
+		return(-1);
+	}
+
+	/* If oldest_modification is same then decide on the space. */
+	ret = (int)(b2->space - b1->space);
+
+	/* Or else decide ordering on the offset field. */
+	return(ret ? ret : (int)(b2->offset - b1->offset));
+}
+
+/********************************************************************//**
+Initialize the red-black tree to speed up insertions into the flush_list
+during recovery process. Should be called at the start of recovery
+process before any page has been read/written. */
+UNIV_INTERN
+void
+buf_flush_init_flush_rbt(void)
+/*==========================*/
+{
+	buf_pool_mutex_enter();
+
+	/* Create red black tree for speedy insertions in flush list. */
+	buf_pool->flush_rbt = rbt_create(sizeof(buf_page_t*),
+					 buf_flush_block_cmp);
+	buf_pool_mutex_exit();
+}
+
+/********************************************************************//**
+Frees up the red-black tree. */
+UNIV_INTERN
+void
+buf_flush_free_flush_rbt(void)
+/*==========================*/
+{
+	buf_pool_mutex_enter();
+
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+	ut_a(buf_flush_validate_low());
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+
+	rbt_free(buf_pool->flush_rbt);
+	buf_pool->flush_rbt = NULL;
+
+	buf_pool_mutex_exit();
+}
+
+/********************************************************************//**
 Inserts a modified block into the flush list. */
 UNIV_INTERN
 void
@@ -100,6 +232,13 @@ buf_flush_insert_into_flush_list(
 	      || (UT_LIST_GET_FIRST(buf_pool->flush_list)->oldest_modification
 		  <= block->page.oldest_modification));
 
+	/* If we are in the recovery then we need to update the flush
+	red-black tree as well. */
+	if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
+		buf_flush_insert_sorted_into_flush_list(block);
+		return;
+	}
+
 	ut_ad(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
 	ut_ad(block->page.in_LRU_list);
 	ut_ad(block->page.in_page_hash);
@@ -136,12 +275,27 @@ buf_flush_insert_sorted_into_flush_list(
 	ut_d(block->page.in_flush_list = TRUE);
 
 	prev_b = NULL;
-	b = UT_LIST_GET_FIRST(buf_pool->flush_list);
 
-	while (b && b->oldest_modification > block->page.oldest_modification) {
-		ut_ad(b->in_flush_list);
-		prev_b = b;
-		b = UT_LIST_GET_NEXT(list, b);
+	/* For the most part when this function is called the flush_rbt
+	should not be NULL. In a very rare boundary case it is possible
+	that the flush_rbt has already been freed by the recovery thread
+	before the last page was hooked up in the flush_list by the
+	io-handler thread. In that case we'll  just do a simple
+	linear search in the else block. */
+	if (buf_pool->flush_rbt) {
+
+		prev_b = buf_flush_insert_in_flush_rbt(&block->page);
+
+	} else {
+
+		b = UT_LIST_GET_FIRST(buf_pool->flush_list);
+
+		while (b && b->oldest_modification
+		       > block->page.oldest_modification) {
+			ut_ad(b->in_flush_list);
+			prev_b = b;
+			b = UT_LIST_GET_NEXT(list, b);
+		}
 	}
 
 	if (prev_b == NULL) {
@@ -237,7 +391,6 @@ buf_flush_remove(
 	ut_ad(buf_pool_mutex_own());
 	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
 	ut_ad(bpage->in_flush_list);
-	ut_d(bpage->in_flush_list = FALSE);
 
 	switch (buf_page_get_state(bpage)) {
 	case BUF_BLOCK_ZIP_PAGE:
@@ -259,6 +412,15 @@ buf_flush_remove(
 		break;
 	}
 
+	/* If the flush_rbt is active then delete from it as well. */
+	if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
+		buf_flush_delete_from_flush_rbt(bpage);
+	}
+
+	/* Must be done after we have removed it from the flush_rbt
+	because we assert on in_flush_list in comparison function. */
+	ut_d(bpage->in_flush_list = FALSE);
+
 	bpage->oldest_modification = 0;
 
 	ut_d(UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list,
@@ -266,6 +428,63 @@ buf_flush_remove(
 }
 
 /********************************************************************//**
+Relocates a buffer control block on the flush_list.
+Note that it is assumed that the contents of bpage has already been
+copied to dpage. */
+UNIV_INTERN
+void
+buf_flush_relocate_on_flush_list(
+/*=============================*/
+	buf_page_t*	bpage,	/*!< in/out: control block being moved */
+	buf_page_t*	dpage)	/*!< in/out: destination block */
+{
+	buf_page_t* prev;
+	buf_page_t* prev_b = NULL;
+
+	ut_ad(buf_pool_mutex_own());
+
+	ut_ad(mutex_own(buf_page_get_mutex(bpage)));
+
+	ut_ad(bpage->in_flush_list);
+	ut_ad(dpage->in_flush_list);
+
+	/* If recovery is active we must swap the control blocks in
+	the flush_rbt as well. */
+	if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
+		buf_flush_delete_from_flush_rbt(bpage);
+		prev_b = buf_flush_insert_in_flush_rbt(dpage);
+	}
+
+	/* Must be done after we have removed it from the flush_rbt
+	because we assert on in_flush_list in comparison function. */
+	ut_d(bpage->in_flush_list = FALSE);
+
+	prev = UT_LIST_GET_PREV(list, bpage);
+	UT_LIST_REMOVE(list, buf_pool->flush_list, bpage);
+
+	if (prev) {
+		ut_ad(prev->in_flush_list);
+		UT_LIST_INSERT_AFTER(
+			list,
+			buf_pool->flush_list,
+			prev, dpage);
+	} else {
+		UT_LIST_ADD_FIRST(
+			list,
+			buf_pool->flush_list,
+			dpage);
+	}
+
+	/* Just an extra check. Previous in flush_list
+	should be the same control block as in flush_rbt. */
+	ut_a(!buf_pool->flush_rbt || prev_b == prev);
+
+#if defined UNIV_DEBUG || defined UNIV_BUF_DEBUG
+	ut_a(buf_flush_validate_low());
+#endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
+}
+
+/********************************************************************//**
 Updates the flush system data structures when a write is completed. */
 UNIV_INTERN
 void
@@ -1367,24 +1586,45 @@ ibool
 buf_flush_validate_low(void)
 /*========================*/
 {
-	buf_page_t*	bpage;
+	buf_page_t*		bpage;
+	const ib_rbt_node_t*	rnode = NULL;
 
 	UT_LIST_VALIDATE(list, buf_page_t, buf_pool->flush_list,
 			 ut_ad(ut_list_node_313->in_flush_list));
 
 	bpage = UT_LIST_GET_FIRST(buf_pool->flush_list);
 
+	/* If we are in recovery mode i.e.: flush_rbt != NULL
+	then each block in the flush_list must also be present
+	in the flush_rbt. */
+	if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
+		rnode = rbt_first(buf_pool->flush_rbt);
+	}
+
 	while (bpage != NULL) {
 		const ib_uint64_t om = bpage->oldest_modification;
 		ut_ad(bpage->in_flush_list);
 		ut_a(buf_page_in_file(bpage));
 		ut_a(om > 0);
 
+		if (UNIV_LIKELY_NULL(buf_pool->flush_rbt)) {
+			ut_a(rnode);
+			buf_page_t* rpage = *rbt_value(buf_page_t*,
+						       rnode);
+			ut_a(rpage);
+			ut_a(rpage == bpage);
+			rnode = rbt_next(buf_pool->flush_rbt, rnode);
+		}
+
 		bpage = UT_LIST_GET_NEXT(list, bpage);
 
 		ut_a(!bpage || om >= bpage->oldest_modification);
 	}
 
+	/* By this time we must have exhausted the traversal of
+	flush_rbt (if active) as well. */
+	ut_a(rnode == NULL);
+
 	return(TRUE);
 }
 

=== modified file 'storage/innodb_plugin/buf/buf0lru.c'
--- a/storage/innodb_plugin/buf/buf0lru.c	2009-11-03 10:26:07 +0000
+++ b/storage/innodb_plugin/buf/buf0lru.c	2010-04-01 12:56:22 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -350,17 +350,31 @@ scan_again:
 	bpage = UT_LIST_GET_LAST(buf_pool->LRU);
 
 	while (bpage != NULL) {
-		mutex_t*	block_mutex = buf_page_get_mutex(bpage);
 		buf_page_t*	prev_bpage;
+		ibool		prev_bpage_buf_fix = FALSE;
 
 		ut_a(buf_page_in_file(bpage));
 
-		mutex_enter(block_mutex);
 		prev_bpage = UT_LIST_GET_PREV(LRU, bpage);
 
-		if (buf_page_get_space(bpage) == id) {
-			if (bpage->buf_fix_count > 0
-			    || buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
+		/* bpage->space and bpage->io_fix are protected by
+		buf_pool_mutex and block_mutex.  It is safe to check
+		them while holding buf_pool_mutex only. */
+
+		if (buf_page_get_space(bpage) != id) {
+			/* Skip this block, as it does not belong to
+			the space that is being invalidated. */
+		} else if (buf_page_get_io_fix(bpage) != BUF_IO_NONE) {
+			/* We cannot remove this page during this scan
+			yet; maybe the system is currently reading it
+			in, or flushing the modifications to the file */
+
+			all_freed = FALSE;
+		} else {
+			mutex_t* block_mutex = buf_page_get_mutex(bpage);
+			mutex_enter(block_mutex);
+
+			if (bpage->buf_fix_count > 0) {
 
 				/* We cannot remove this page during
 				this scan yet; maybe the system is
@@ -380,8 +394,40 @@ scan_again:
 					(ulong) buf_page_get_page_no(bpage));
 			}
 #endif
-			if (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE
-			    && ((buf_block_t*) bpage)->is_hashed) {
+			if (buf_page_get_state(bpage) != BUF_BLOCK_FILE_PAGE) {
+				/* This is a compressed-only block
+				descriptor.  Ensure that prev_bpage
+				cannot be relocated when bpage is freed. */
+				if (UNIV_LIKELY(prev_bpage != NULL)) {
+					switch (buf_page_get_state(
+							prev_bpage)) {
+					case BUF_BLOCK_FILE_PAGE:
+						/* Descriptors of uncompressed
+						blocks will not be relocated,
+						because we are holding the
+						buf_pool_mutex. */
+						break;
+					case BUF_BLOCK_ZIP_PAGE:
+					case BUF_BLOCK_ZIP_DIRTY:
+						/* Descriptors of compressed-
+						only blocks can be relocated,
+						unless they are buffer-fixed.
+						Because both bpage and
+						prev_bpage are protected by
+						buf_pool_zip_mutex, it is
+						not necessary to acquire
+						further mutexes. */
+						ut_ad(&buf_pool_zip_mutex
+						      == block_mutex);
+						ut_ad(mutex_own(block_mutex));
+						prev_bpage_buf_fix = TRUE;
+						prev_bpage->buf_fix_count++;
+						break;
+					default:
+						ut_error;
+					}
+				}
+			} else if (((buf_block_t*) bpage)->is_hashed) {
 				ulint	page_no;
 				ulint	zip_size;
 
@@ -405,7 +451,8 @@ scan_again:
 				buf_flush_remove(bpage);
 			}
 
-			/* Remove from the LRU list */
+			/* Remove from the LRU list. */
+
 			if (buf_LRU_block_remove_hashed_page(bpage, TRUE)
 			    != BUF_BLOCK_ZIP_FREE) {
 				buf_LRU_block_free_hashed_page((buf_block_t*)
@@ -417,18 +464,27 @@ scan_again:
 				ut_ad(block_mutex == &buf_pool_zip_mutex);
 				ut_ad(!mutex_own(block_mutex));
 
-				/* The compressed block descriptor
-				(bpage) has been deallocated and
-				block_mutex released.  Also,
-				buf_buddy_free() may have relocated
-				prev_bpage.  Rescan the LRU list. */
+				if (prev_bpage_buf_fix) {
+					/* We temporarily buffer-fixed
+					prev_bpage, so that
+					buf_buddy_free() could not
+					relocate it, in case it was a
+					compressed-only block
+					descriptor. */
+
+					mutex_enter(block_mutex);
+					ut_ad(prev_bpage->buf_fix_count > 0);
+					prev_bpage->buf_fix_count--;
+					mutex_exit(block_mutex);
+				}
 
-				bpage = UT_LIST_GET_LAST(buf_pool->LRU);
-				continue;
+				goto next_page_no_mutex;
 			}
-		}
 next_page:
-		mutex_exit(block_mutex);
+			mutex_exit(block_mutex);
+		}
+
+next_page_no_mutex:
 		bpage = prev_bpage;
 	}
 
@@ -1474,26 +1530,8 @@ alloc:
 			if (b->state == BUF_BLOCK_ZIP_PAGE) {
 				buf_LRU_insert_zip_clean(b);
 			} else {
-				buf_page_t* prev;
-
-				ut_ad(b->in_flush_list);
-				ut_d(bpage->in_flush_list = FALSE);
-
-				prev = UT_LIST_GET_PREV(list, b);
-				UT_LIST_REMOVE(list, buf_pool->flush_list, b);
-
-				if (prev) {
-					ut_ad(prev->in_flush_list);
-					UT_LIST_INSERT_AFTER(
-						list,
-						buf_pool->flush_list,
-						prev, b);
-				} else {
-					UT_LIST_ADD_FIRST(
-						list,
-						buf_pool->flush_list,
-						b);
-				}
+				/* Relocate on buf_pool->flush_list. */
+				buf_flush_relocate_on_flush_list(bpage, b);
 			}
 
 			bpage->zip.data = NULL;

=== modified file 'storage/innodb_plugin/buf/buf0rea.c'
--- a/storage/innodb_plugin/buf/buf0rea.c	2009-10-08 11:28:37 +0000
+++ b/storage/innodb_plugin/buf/buf0rea.c	2010-04-01 13:01:22 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -608,14 +608,14 @@ buf_read_recv_pages(
 		while (buf_pool->n_pend_reads >= recv_n_pool_free_frames / 2) {
 
 			os_aio_simulated_wake_handler_threads();
-			os_thread_sleep(500000);
+			os_thread_sleep(10000);
 
 			count++;
 
-			if (count > 100) {
+			if (count > 1000) {
 				fprintf(stderr,
 					"InnoDB: Error: InnoDB has waited for"
-					" 50 seconds for pending\n"
+					" 10 seconds for pending\n"
 					"InnoDB: reads to the buffer pool to"
 					" be finished.\n"
 					"InnoDB: Number of pending reads %lu,"

=== modified file 'storage/innodb_plugin/dict/dict0boot.c'
--- a/storage/innodb_plugin/dict/dict0boot.c	2009-05-27 09:45:59 +0000
+++ b/storage/innodb_plugin/dict/dict0boot.c	2010-04-01 11:59:25 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -274,6 +274,9 @@ dict_boot(void)
 	and (TYPE & DICT_TF_FORMAT_MASK) are nonzero and TYPE = table->flags */
 	dict_mem_table_add_col(table, heap, "TYPE", DATA_INT, 0, 4);
 	dict_mem_table_add_col(table, heap, "MIX_ID", DATA_BINARY, 0, 0);
+	/* MIX_LEN may contain additional table flags when
+	ROW_FORMAT!=REDUNDANT.  Currently, these flags include
+	DICT_TF2_TEMPORARY. */
 	dict_mem_table_add_col(table, heap, "MIX_LEN", DATA_INT, 0, 4);
 	dict_mem_table_add_col(table, heap, "CLUSTER_NAME", DATA_BINARY, 0, 0);
 	dict_mem_table_add_col(table, heap, "SPACE", DATA_INT, 0, 4);
@@ -355,7 +358,7 @@ dict_boot(void)
 	dict_mem_table_add_col(table, heap, "SPACE", DATA_INT, 0, 4);
 	dict_mem_table_add_col(table, heap, "PAGE_NO", DATA_INT, 0, 4);
 
-	/* The '+ 2' below comes from the 2 system fields */
+	/* The '+ 2' below comes from the fields DB_TRX_ID, DB_ROLL_PTR */
 #if DICT_SYS_INDEXES_PAGE_NO_FIELD != 6 + 2
 #error "DICT_SYS_INDEXES_PAGE_NO_FIELD != 6 + 2"
 #endif
@@ -365,6 +368,9 @@ dict_boot(void)
 #if DICT_SYS_INDEXES_TYPE_FIELD != 4 + 2
 #error "DICT_SYS_INDEXES_TYPE_FIELD != 4 + 2"
 #endif
+#if DICT_SYS_INDEXES_NAME_FIELD != 1 + 2
+#error "DICT_SYS_INDEXES_NAME_FIELD != 1 + 2"
+#endif
 
 	table->id = DICT_INDEXES_ID;
 	dict_table_add_to_cache(table, heap);

=== modified file 'storage/innodb_plugin/dict/dict0crea.c'
--- a/storage/innodb_plugin/dict/dict0crea.c	2009-10-08 09:13:16 +0000
+++ b/storage/innodb_plugin/dict/dict0crea.c	2010-04-01 11:59:25 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -51,16 +51,18 @@ static
 dtuple_t*
 dict_create_sys_tables_tuple(
 /*=========================*/
-	dict_table_t*	table,	/*!< in: table */
-	mem_heap_t*	heap)	/*!< in: memory heap from which the memory for
-				the built tuple is allocated */
+	const dict_table_t*	table,	/*!< in: table */
+	mem_heap_t*		heap)	/*!< in: memory heap from
+					which the memory for the built
+					tuple is allocated */
 {
 	dict_table_t*	sys_tables;
 	dtuple_t*	entry;
 	dfield_t*	dfield;
 	byte*		ptr;
 
-	ut_ad(table && heap);
+	ut_ad(table);
+	ut_ad(heap);
 
 	sys_tables = dict_sys->sys_tables;
 
@@ -69,18 +71,18 @@ dict_create_sys_tables_tuple(
 	dict_table_copy_types(entry, sys_tables);
 
 	/* 0: NAME -----------------------------*/
-	dfield = dtuple_get_nth_field(entry, 0);
+	dfield = dtuple_get_nth_field(entry, 0/*NAME*/);
 
 	dfield_set_data(dfield, table->name, ut_strlen(table->name));
 	/* 3: ID -------------------------------*/
-	dfield = dtuple_get_nth_field(entry, 1);
+	dfield = dtuple_get_nth_field(entry, 1/*ID*/);
 
 	ptr = mem_heap_alloc(heap, 8);
 	mach_write_to_8(ptr, table->id);
 
 	dfield_set_data(dfield, ptr, 8);
 	/* 4: N_COLS ---------------------------*/
-	dfield = dtuple_get_nth_field(entry, 2);
+	dfield = dtuple_get_nth_field(entry, 2/*N_COLS*/);
 
 #if DICT_TF_COMPACT != 1
 #error
@@ -91,40 +93,41 @@ dict_create_sys_tables_tuple(
 			| ((table->flags & DICT_TF_COMPACT) << 31));
 	dfield_set_data(dfield, ptr, 4);
 	/* 5: TYPE -----------------------------*/
-	dfield = dtuple_get_nth_field(entry, 3);
+	dfield = dtuple_get_nth_field(entry, 3/*TYPE*/);
 
 	ptr = mem_heap_alloc(heap, 4);
-	if (table->flags & ~DICT_TF_COMPACT) {
+	if (table->flags & (~DICT_TF_COMPACT & ~(~0 << DICT_TF_BITS))) {
 		ut_a(table->flags & DICT_TF_COMPACT);
 		ut_a(dict_table_get_format(table) >= DICT_TF_FORMAT_ZIP);
 		ut_a((table->flags & DICT_TF_ZSSIZE_MASK)
 		     <= (DICT_TF_ZSSIZE_MAX << DICT_TF_ZSSIZE_SHIFT));
-		ut_a(!(table->flags & (~0 << DICT_TF_BITS)));
-		mach_write_to_4(ptr, table->flags);
+		ut_a(!(table->flags & (~0 << DICT_TF2_BITS)));
+		mach_write_to_4(ptr, table->flags & ~(~0 << DICT_TF_BITS));
 	} else {
 		mach_write_to_4(ptr, DICT_TABLE_ORDINARY);
 	}
 
 	dfield_set_data(dfield, ptr, 4);
 	/* 6: MIX_ID (obsolete) ---------------------------*/
-	dfield = dtuple_get_nth_field(entry, 4);
+	dfield = dtuple_get_nth_field(entry, 4/*MIX_ID*/);
 
 	ptr = mem_heap_zalloc(heap, 8);
 
 	dfield_set_data(dfield, ptr, 8);
-	/* 7: MIX_LEN (obsolete) --------------------------*/
+	/* 7: MIX_LEN (additional flags) --------------------------*/
 
-	dfield = dtuple_get_nth_field(entry, 5);
+	dfield = dtuple_get_nth_field(entry, 5/*MIX_LEN*/);
 
-	ptr = mem_heap_zalloc(heap, 4);
+	ptr = mem_heap_alloc(heap, 4);
+	mach_write_to_4(ptr, table->flags >> DICT_TF2_SHIFT);
 
 	dfield_set_data(dfield, ptr, 4);
 	/* 8: CLUSTER_NAME ---------------------*/
-	dfield = dtuple_get_nth_field(entry, 6);
+	dfield = dtuple_get_nth_field(entry, 6/*CLUSTER_NAME*/);
 	dfield_set_null(dfield); /* not supported */
 
 	/* 9: SPACE ----------------------------*/
-	dfield = dtuple_get_nth_field(entry, 7);
+	dfield = dtuple_get_nth_field(entry, 7/*SPACE*/);
 
 	ptr = mem_heap_alloc(heap, 4);
 	mach_write_to_4(ptr, table->space);
@@ -143,19 +146,21 @@ static
 dtuple_t*
 dict_create_sys_columns_tuple(
 /*==========================*/
-	dict_table_t*	table,	/*!< in: table */
-	ulint		i,	/*!< in: column number */
-	mem_heap_t*	heap)	/*!< in: memory heap from which the memory for
-				the built tuple is allocated */
+	const dict_table_t*	table,	/*!< in: table */
+	ulint			i,	/*!< in: column number */
+	mem_heap_t*		heap)	/*!< in: memory heap from
+					which the memory for the built
+					tuple is allocated */
 {
 	dict_table_t*		sys_columns;
 	dtuple_t*		entry;
 	const dict_col_t*	column;
 	dfield_t*		dfield;
 	byte*			ptr;
-	const char*	col_name;
+	const char*		col_name;
 
-	ut_ad(table && heap);
+	ut_ad(table);
+	ut_ad(heap);
 
 	column = dict_table_get_nth_col(table, i);
 
@@ -166,47 +171,47 @@ dict_create_sys_columns_tuple(
 	dict_table_copy_types(entry, sys_columns);
 
 	/* 0: TABLE_ID -----------------------*/
-	dfield = dtuple_get_nth_field(entry, 0);
+	dfield = dtuple_get_nth_field(entry, 0/*TABLE_ID*/);
 
 	ptr = mem_heap_alloc(heap, 8);
 	mach_write_to_8(ptr, table->id);
 
 	dfield_set_data(dfield, ptr, 8);
 	/* 1: POS ----------------------------*/
-	dfield = dtuple_get_nth_field(entry, 1);
+	dfield = dtuple_get_nth_field(entry, 1/*POS*/);
 
 	ptr = mem_heap_alloc(heap, 4);
 	mach_write_to_4(ptr, i);
 
 	dfield_set_data(dfield, ptr, 4);
 	/* 4: NAME ---------------------------*/
-	dfield = dtuple_get_nth_field(entry, 2);
+	dfield = dtuple_get_nth_field(entry, 2/*NAME*/);
 
 	col_name = dict_table_get_col_name(table, i);
 	dfield_set_data(dfield, col_name, ut_strlen(col_name));
 	/* 5: MTYPE --------------------------*/
-	dfield = dtuple_get_nth_field(entry, 3);
+	dfield = dtuple_get_nth_field(entry, 3/*MTYPE*/);
 
 	ptr = mem_heap_alloc(heap, 4);
 	mach_write_to_4(ptr, column->mtype);
 
 	dfield_set_data(dfield, ptr, 4);
 	/* 6: PRTYPE -------------------------*/
-	dfield = dtuple_get_nth_field(entry, 4);
+	dfield = dtuple_get_nth_field(entry, 4/*PRTYPE*/);
 
 	ptr = mem_heap_alloc(heap, 4);
 	mach_write_to_4(ptr, column->prtype);
 
 	dfield_set_data(dfield, ptr, 4);
 	/* 7: LEN ----------------------------*/
-	dfield = dtuple_get_nth_field(entry, 5);
+	dfield = dtuple_get_nth_field(entry, 5/*LEN*/);
 
 	ptr = mem_heap_alloc(heap, 4);
 	mach_write_to_4(ptr, column->len);
 
 	dfield_set_data(dfield, ptr, 4);
 	/* 8: PREC ---------------------------*/
-	dfield = dtuple_get_nth_field(entry, 6);
+	dfield = dtuple_get_nth_field(entry, 6/*PREC*/);
 
 	ptr = mem_heap_alloc(heap, 4);
 	mach_write_to_4(ptr, 0/* unused */);
@@ -230,6 +235,7 @@ dict_build_table_def_step(
 	dict_table_t*	table;
 	dtuple_t*	row;
 	ulint		error;
+	ulint		flags;
 	const char*	path_or_name;
 	ibool		is_path;
 	mtr_t		mtr;
@@ -268,9 +274,10 @@ dict_build_table_def_step(
 		ut_ad(!dict_table_zip_size(table)
 		      || dict_table_get_format(table) >= DICT_TF_FORMAT_ZIP);
 
+		flags = table->flags & ~(~0 << DICT_TF_BITS);
 		error = fil_create_new_single_table_tablespace(
 			&space, path_or_name, is_path,
-			table->flags == DICT_TF_COMPACT ? 0 : table->flags,
+			flags == DICT_TF_COMPACT ? 0 : flags,
 			FIL_IBD_FILE_INITIAL_SIZE);
 		table->space = (unsigned int) space;
 
@@ -286,7 +293,7 @@ dict_build_table_def_step(
 		mtr_commit(&mtr);
 	} else {
 		/* Create in the system tablespace: disallow new features */
-		table->flags &= DICT_TF_COMPACT;
+		table->flags &= (~0 << DICT_TF_BITS) | DICT_TF_COMPACT;
 	}
 
 	row = dict_create_sys_tables_tuple(table, node->heap);
@@ -322,9 +329,10 @@ static
 dtuple_t*
 dict_create_sys_indexes_tuple(
 /*==========================*/
-	dict_index_t*	index,	/*!< in: index */
-	mem_heap_t*	heap)	/*!< in: memory heap from which the memory for
-				the built tuple is allocated */
+	const dict_index_t*	index,	/*!< in: index */
+	mem_heap_t*		heap)	/*!< in: memory heap from
+					which the memory for the built
+					tuple is allocated */
 {
 	dict_table_t*	sys_indexes;
 	dict_table_t*	table;
@@ -333,7 +341,8 @@ dict_create_sys_indexes_tuple(
 	byte*		ptr;
 
 	ut_ad(mutex_own(&(dict_sys->mutex)));
-	ut_ad(index && heap);
+	ut_ad(index);
+	ut_ad(heap);
 
 	sys_indexes = dict_sys->sys_indexes;
 
@@ -344,32 +353,32 @@ dict_create_sys_indexes_tuple(
 	dict_table_copy_types(entry, sys_indexes);
 
 	/* 0: TABLE_ID -----------------------*/
-	dfield = dtuple_get_nth_field(entry, 0);
+	dfield = dtuple_get_nth_field(entry, 0/*TABLE_ID*/);
 
 	ptr = mem_heap_alloc(heap, 8);
 	mach_write_to_8(ptr, table->id);
 
 	dfield_set_data(dfield, ptr, 8);
 	/* 1: ID ----------------------------*/
-	dfield = dtuple_get_nth_field(entry, 1);
+	dfield = dtuple_get_nth_field(entry, 1/*ID*/);
 
 	ptr = mem_heap_alloc(heap, 8);
 	mach_write_to_8(ptr, index->id);
 
 	dfield_set_data(dfield, ptr, 8);
 	/* 4: NAME --------------------------*/
-	dfield = dtuple_get_nth_field(entry, 2);
+	dfield = dtuple_get_nth_field(entry, 2/*NAME*/);
 
 	dfield_set_data(dfield, index->name, ut_strlen(index->name));
 	/* 5: N_FIELDS ----------------------*/
-	dfield = dtuple_get_nth_field(entry, 3);
+	dfield = dtuple_get_nth_field(entry, 3/*N_FIELDS*/);
 
 	ptr = mem_heap_alloc(heap, 4);
 	mach_write_to_4(ptr, index->n_fields);
 
 	dfield_set_data(dfield, ptr, 4);
 	/* 6: TYPE --------------------------*/
-	dfield = dtuple_get_nth_field(entry, 4);
+	dfield = dtuple_get_nth_field(entry, 4/*TYPE*/);
 
 	ptr = mem_heap_alloc(heap, 4);
 	mach_write_to_4(ptr, index->type);
@@ -381,7 +390,7 @@ dict_create_sys_indexes_tuple(
 #error "DICT_SYS_INDEXES_SPACE_NO_FIELD != 7"
 #endif
 
-	dfield = dtuple_get_nth_field(entry, 5);
+	dfield = dtuple_get_nth_field(entry, 5/*SPACE*/);
 
 	ptr = mem_heap_alloc(heap, 4);
 	mach_write_to_4(ptr, index->space);
@@ -393,7 +402,7 @@ dict_create_sys_indexes_tuple(
 #error "DICT_SYS_INDEXES_PAGE_NO_FIELD != 8"
 #endif
 
-	dfield = dtuple_get_nth_field(entry, 6);
+	dfield = dtuple_get_nth_field(entry, 6/*PAGE_NO*/);
 
 	ptr = mem_heap_alloc(heap, 4);
 	mach_write_to_4(ptr, FIL_NULL);
@@ -412,10 +421,11 @@ static
 dtuple_t*
 dict_create_sys_fields_tuple(
 /*=========================*/
-	dict_index_t*	index,	/*!< in: index */
-	ulint		i,	/*!< in: field number */
-	mem_heap_t*	heap)	/*!< in: memory heap from which the memory for
-				the built tuple is allocated */
+	const dict_index_t*	index,	/*!< in: index */
+	ulint			i,	/*!< in: field number */
+	mem_heap_t*		heap)	/*!< in: memory heap from
+					which the memory for the built
+					tuple is allocated */
 {
 	dict_table_t*	sys_fields;
 	dtuple_t*	entry;
@@ -425,7 +435,8 @@ dict_create_sys_fields_tuple(
 	ibool		index_contains_column_prefix_field	= FALSE;
 	ulint		j;
 
-	ut_ad(index && heap);
+	ut_ad(index);
+	ut_ad(heap);
 
 	for (j = 0; j < index->n_fields; j++) {
 		if (dict_index_get_nth_field(index, j)->prefix_len > 0) {
@@ -443,7 +454,7 @@ dict_create_sys_fields_tuple(
 	dict_table_copy_types(entry, sys_fields);
 
 	/* 0: INDEX_ID -----------------------*/
-	dfield = dtuple_get_nth_field(entry, 0);
+	dfield = dtuple_get_nth_field(entry, 0/*INDEX_ID*/);
 
 	ptr = mem_heap_alloc(heap, 8);
 	mach_write_to_8(ptr, index->id);
@@ -451,7 +462,7 @@ dict_create_sys_fields_tuple(
 	dfield_set_data(dfield, ptr, 8);
 	/* 1: POS + PREFIX LENGTH ----------------------------*/
 
-	dfield = dtuple_get_nth_field(entry, 1);
+	dfield = dtuple_get_nth_field(entry, 1/*POS*/);
 
 	ptr = mem_heap_alloc(heap, 4);
 
@@ -471,7 +482,7 @@ dict_create_sys_fields_tuple(
 
 	dfield_set_data(dfield, ptr, 4);
 	/* 4: COL_NAME -------------------------*/
-	dfield = dtuple_get_nth_field(entry, 2);
+	dfield = dtuple_get_nth_field(entry, 2/*COL_NAME*/);
 
 	dfield_set_data(dfield, field->name,
 			ut_strlen(field->name));
@@ -602,6 +613,7 @@ dict_create_index_tree_step(
 	dict_table_t*	sys_indexes;
 	dict_table_t*	table;
 	dtuple_t*	search_tuple;
+	ulint		zip_size;
 	btr_pcur_t	pcur;
 	mtr_t		mtr;
 
@@ -626,8 +638,9 @@ dict_create_index_tree_step(
 
 	btr_pcur_move_to_next_user_rec(&pcur, &mtr);
 
-	node->page_no = btr_create(index->type, index->space,
-				   dict_table_zip_size(index->table),
+	zip_size = dict_table_zip_size(index->table);
+
+	node->page_no = btr_create(index->type, index->space, zip_size,
 				   index->id, index, &mtr);
 	/* printf("Created a new index tree in space %lu root page %lu\n",
 	index->space, index->page_no); */

=== modified file 'storage/innodb_plugin/dict/dict0dict.c'
--- a/storage/innodb_plugin/dict/dict0dict.c	2009-11-30 11:42:51 +0000
+++ b/storage/innodb_plugin/dict/dict0dict.c	2010-04-01 11:59:25 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -140,7 +140,7 @@ static
 void
 dict_field_print_low(
 /*=================*/
-	dict_field_t*	field);	/*!< in: field */
+	const dict_field_t*	field);	/*!< in: field */
 /*********************************************************************//**
 Frees a foreign key struct. */
 static
@@ -1460,6 +1460,7 @@ dict_index_add_to_cache(
 
 	if (!dict_index_find_cols(table, index)) {
 
+		dict_mem_index_free(index);
 		return(DB_CORRUPTION);
 	}
 
@@ -4402,7 +4403,7 @@ static
 void
 dict_field_print_low(
 /*=================*/
-	dict_field_t*	field)	/*!< in: field */
+	const dict_field_t*	field)	/*!< in: field */
 {
 	ut_ad(mutex_own(&(dict_sys->mutex)));
 
@@ -4775,6 +4776,8 @@ dict_table_check_for_dup_indexes(
 	const dict_index_t*	index1;
 	const dict_index_t*	index2;
 
+	ut_ad(mutex_own(&dict_sys->mutex));
+
 	/* The primary index _must_ exist */
 	ut_a(UT_LIST_GET_LEN(table->indexes) > 0);
 

=== modified file 'storage/innodb_plugin/dict/dict0load.c'
--- a/storage/innodb_plugin/dict/dict0load.c	2009-05-27 09:45:59 +0000
+++ b/storage/innodb_plugin/dict/dict0load.c	2010-04-01 11:59:25 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -260,7 +260,7 @@ dict_sys_tables_get_flags(
 		return(0);
 	}
 
-	field = rec_get_nth_field_old(rec, 4, &len);
+	field = rec_get_nth_field_old(rec, 4/*N_COLS*/, &len);
 	n_cols = mach_read_from_4(field);
 
 	if (UNIV_UNLIKELY(!(n_cols & 0x80000000UL))) {
@@ -390,15 +390,35 @@ loop:
 
 		mtr_commit(&mtr);
 
-		if (space_id != 0 && in_crash_recovery) {
+		if (space_id == 0) {
+			/* The system tablespace always exists. */
+		} else if (in_crash_recovery) {
 			/* Check that the tablespace (the .ibd file) really
-			exists; print a warning to the .err log if not */
-
-			fil_space_for_table_exists_in_mem(space_id, name,
-							  FALSE, TRUE, TRUE);
-		}
+			exists; print a warning to the .err log if not.
+			Do not print warnings for temporary tables. */
+			ibool	is_temp;
+
+			field = rec_get_nth_field_old(rec, 4, &len);
+			if (0x80000000UL &  mach_read_from_4(field)) {
+				/* ROW_FORMAT=COMPACT: read the is_temp
+				flag from SYS_TABLES.MIX_LEN. */
+				field = rec_get_nth_field_old(rec, 7, &len);
+				is_temp = mach_read_from_4(field)
+					& DICT_TF2_TEMPORARY;
+			} else {
+				/* For tables created with old versions
+				of InnoDB, SYS_TABLES.MIX_LEN may contain
+				garbage.  Such tables would always be
+				in ROW_FORMAT=REDUNDANT.  Pretend that
+				all such tables are non-temporary.  That is,
+				do not suppress error printouts about
+				temporary tables not being found. */
+				is_temp = FALSE;
+			}
 
-		if (space_id != 0 && !in_crash_recovery) {
+			fil_space_for_table_exists_in_mem(
+				space_id, name, is_temp, TRUE, !is_temp);
+		} else {
 			/* It is a normal database startup: create the space
 			object and check that the .ibd file exists. */
 
@@ -894,43 +914,72 @@ err_exit:
 				(ulong) flags);
 			goto err_exit;
 		}
+	} else {
+		flags = 0;
+	}
 
-		if (fil_space_for_table_exists_in_mem(space, name, FALSE,
-						      FALSE, FALSE)) {
-			/* Ok; (if we did a crash recovery then the tablespace
-			can already be in the memory cache) */
-		} else {
-			/* In >= 4.1.9, InnoDB scans the data dictionary also
-			at a normal mysqld startup. It is an error if the
-			space object does not exist in memory. */
+	ut_a(name_of_col_is(sys_tables, sys_index, 4, "N_COLS"));
 
+	field = rec_get_nth_field_old(rec, 4, &len);
+	n_cols = mach_read_from_4(field);
+
+	/* The high-order bit of N_COLS is the "compact format" flag.
+	For tables in that format, MIX_LEN may hold additional flags. */
+	if (n_cols & 0x80000000UL) {
+		ulint	flags2;
+
+		flags |= DICT_TF_COMPACT;
+
+		ut_a(name_of_col_is(sys_tables, sys_index, 7, "MIX_LEN"));
+		field = rec_get_nth_field_old(rec, 7, &len);
+
+		flags2 = mach_read_from_4(field);
+
+		if (flags2 & (~0 << (DICT_TF2_BITS - DICT_TF2_SHIFT))) {
+			ut_print_timestamp(stderr);
+			fputs("  InnoDB: Warning: table ", stderr);
+			ut_print_filename(stderr, name);
+			fprintf(stderr, "\n"
+				"InnoDB: in InnoDB data dictionary"
+				" has unknown flags %lx.\n",
+				(ulong) flags2);
+
+			flags2 &= ~(~0 << (DICT_TF2_BITS - DICT_TF2_SHIFT));
+		}
+
+		flags |= flags2 << DICT_TF2_SHIFT;
+	}
+
+	/* See if the tablespace is available. */
+	if (space == 0) {
+		/* The system tablespace is always available. */
+	} else if (!fil_space_for_table_exists_in_mem(
+			   space, name,
+			   (flags >> DICT_TF2_SHIFT) & DICT_TF2_TEMPORARY,
+			   FALSE, FALSE)) {
+
+		if ((flags >> DICT_TF2_SHIFT) & DICT_TF2_TEMPORARY) {
+			/* Do not bother to retry opening temporary tables. */
+			ibd_file_missing = TRUE;
+		} else {
 			ut_print_timestamp(stderr);
 			fprintf(stderr,
-				"  InnoDB: error: space object of table %s,\n"
+				"  InnoDB: error: space object of table");
+			ut_print_filename(stderr, name);
+			fprintf(stderr, ",\n"
 				"InnoDB: space id %lu did not exist in memory."
 				" Retrying an open.\n",
-				name, (ulong)space);
+				(ulong) space);
 			/* Try to open the tablespace */
 			if (!fil_open_single_table_tablespace(
-				    TRUE, space, flags, name)) {
-				/* We failed to find a sensible tablespace
-				file */
+				    TRUE, space,
+				    flags & ~(~0 << DICT_TF_BITS), name)) {
+				/* We failed to find a sensible
+				tablespace file */
 
 				ibd_file_missing = TRUE;
 			}
 		}
-	} else {
-		flags = 0;
-	}
-
-	ut_a(name_of_col_is(sys_tables, sys_index, 4, "N_COLS"));
-
-	field = rec_get_nth_field_old(rec, 4, &len);
-	n_cols = mach_read_from_4(field);
-
-	/* The high-order bit of N_COLS is the "compact format" flag. */
-	if (n_cols & 0x80000000UL) {
-		flags |= DICT_TF_COMPACT;
 	}
 
 	table = dict_mem_table_create(name, space, n_cols & ~0x80000000UL,

=== modified file 'storage/innodb_plugin/dict/dict0mem.c'
--- a/storage/innodb_plugin/dict/dict0mem.c	2009-07-30 12:42:56 +0000
+++ b/storage/innodb_plugin/dict/dict0mem.c	2010-04-01 10:45:58 +0000
@@ -59,7 +59,7 @@ dict_mem_table_create(
 	mem_heap_t*	heap;
 
 	ut_ad(name);
-	ut_a(!(flags & (~0 << DICT_TF_BITS)));
+	ut_a(!(flags & (~0 << DICT_TF2_BITS)));
 
 	heap = mem_heap_create(DICT_HEAP_SIZE);
 

=== modified file 'storage/innodb_plugin/fil/fil0fil.c'
--- a/storage/innodb_plugin/fil/fil0fil.c	2009-11-30 11:32:05 +0000
+++ b/storage/innodb_plugin/fil/fil0fil.c	2010-04-01 13:01:22 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -38,6 +38,7 @@ Created 10/25/1995 Heikki Tuuri
 #include "mtr0mtr.h"
 #include "mtr0log.h"
 #include "dict0dict.h"
+#include "page0page.h"
 #include "page0zip.h"
 #ifndef UNIV_HOTBACKUP
 # include "buf0lru.h"
@@ -1097,10 +1098,13 @@ fil_space_create(
 	fil_space_t*	space;
 
 	/* The tablespace flags (FSP_SPACE_FLAGS) should be 0 for
-	ROW_FORMAT=COMPACT (table->flags == DICT_TF_COMPACT) and
+	ROW_FORMAT=COMPACT
+	((table->flags & ~(~0 << DICT_TF_BITS)) == DICT_TF_COMPACT) and
 	ROW_FORMAT=REDUNDANT (table->flags == 0).  For any other
-	format, the tablespace flags should equal table->flags. */
+	format, the tablespace flags should equal
+	(table->flags & ~(~0 << DICT_TF_BITS)). */
 	ut_a(flags != DICT_TF_COMPACT);
+	ut_a(!(flags & (~0UL << DICT_TF_BITS)));
 
 try_again:
 	/*printf(
@@ -2582,10 +2586,13 @@ fil_create_new_single_table_tablespace(
 
 	ut_a(size >= FIL_IBD_FILE_INITIAL_SIZE);
 	/* The tablespace flags (FSP_SPACE_FLAGS) should be 0 for
-	ROW_FORMAT=COMPACT (table->flags == DICT_TF_COMPACT) and
+	ROW_FORMAT=COMPACT
+	((table->flags & ~(~0 << DICT_TF_BITS)) == DICT_TF_COMPACT) and
 	ROW_FORMAT=REDUNDANT (table->flags == 0).  For any other
-	format, the tablespace flags should equal table->flags. */
+	format, the tablespace flags should equal
+	(table->flags & ~(~0 << DICT_TF_BITS)). */
 	ut_a(flags != DICT_TF_COMPACT);
+	ut_a(!(flags & (~0UL << DICT_TF_BITS)));
 
 	path = fil_make_ibd_name(tablename, is_temp);
 
@@ -2786,6 +2793,7 @@ fil_reset_too_high_lsns(
 	ib_int64_t	offset;
 	ulint		zip_size;
 	ibool		success;
+	page_zip_des_t	page_zip;
 
 	filepath = fil_make_ibd_name(name, FALSE);
 
@@ -2833,6 +2841,12 @@ fil_reset_too_high_lsns(
 	space_id = fsp_header_get_space_id(page);
 	zip_size = fsp_header_get_zip_size(page);
 
+	page_zip_des_init(&page_zip);
+	page_zip_set_size(&page_zip, zip_size);
+	if (zip_size) {
+		page_zip.data = page + UNIV_PAGE_SIZE;
+	}
+
 	ut_print_timestamp(stderr);
 	fprintf(stderr,
 		"  InnoDB: Flush lsn in the tablespace file %lu"
@@ -2867,20 +2881,23 @@ fil_reset_too_high_lsns(
 			/* We have to reset the lsn */
 
 			if (zip_size) {
-				memcpy(page + UNIV_PAGE_SIZE, page, zip_size);
+				memcpy(page_zip.data, page, zip_size);
 				buf_flush_init_for_writing(
-					page, page + UNIV_PAGE_SIZE,
-					current_lsn);
+					page, &page_zip, current_lsn);
+				success = os_file_write(
+					filepath, file, page_zip.data,
+					(ulint) offset & 0xFFFFFFFFUL,
+					(ulint) (offset >> 32), zip_size);
 			} else {
 				buf_flush_init_for_writing(
 					page, NULL, current_lsn);
+				success = os_file_write(
+					filepath, file, page,
+					(ulint)(offset & 0xFFFFFFFFUL),
+					(ulint)(offset >> 32),
+					UNIV_PAGE_SIZE);
 			}
-			success = os_file_write(filepath, file, page,
-						(ulint)(offset & 0xFFFFFFFFUL),
-						(ulint)(offset >> 32),
-						zip_size
-						? zip_size
-						: UNIV_PAGE_SIZE);
+
 			if (!success) {
 
 				goto func_exit;
@@ -2956,10 +2973,13 @@ fil_open_single_table_tablespace(
 	filepath = fil_make_ibd_name(name, FALSE);
 
 	/* The tablespace flags (FSP_SPACE_FLAGS) should be 0 for
-	ROW_FORMAT=COMPACT (table->flags == DICT_TF_COMPACT) and
+	ROW_FORMAT=COMPACT
+	((table->flags & ~(~0 << DICT_TF_BITS)) == DICT_TF_COMPACT) and
 	ROW_FORMAT=REDUNDANT (table->flags == 0).  For any other
-	format, the tablespace flags should equal table->flags. */
+	format, the tablespace flags should equal
+	(table->flags & ~(~0 << DICT_TF_BITS)). */
 	ut_a(flags != DICT_TF_COMPACT);
+	ut_a(!(flags & (~0UL << DICT_TF_BITS)));
 
 	file = os_file_create_simple_no_error_handling(
 		filepath, OS_FILE_OPEN, OS_FILE_READ_ONLY, &success);
@@ -3011,7 +3031,8 @@ fil_open_single_table_tablespace(
 
 	ut_free(buf2);
 
-	if (UNIV_UNLIKELY(space_id != id || space_flags != flags)) {
+	if (UNIV_UNLIKELY(space_id != id
+			  || space_flags != (flags & ~(~0 << DICT_TF_BITS)))) {
 		ut_print_timestamp(stderr);
 
 		fputs("  InnoDB: Error: tablespace id and flags in file ",
@@ -4781,8 +4802,10 @@ void
 fil_close(void)
 /*===========*/
 {
+#ifndef UNIV_HOTBACKUP
 	/* The mutex should already have been freed. */
 	ut_ad(fil_system->mutex.magic_n == 0);
+#endif /* !UNIV_HOTBACKUP */
 
 	hash_table_free(fil_system->spaces);
 

=== modified file 'storage/innodb_plugin/fsp/fsp0fsp.c'
--- a/storage/innodb_plugin/fsp/fsp0fsp.c	2009-10-09 14:13:15 +0000
+++ b/storage/innodb_plugin/fsp/fsp0fsp.c	2010-04-01 11:59:25 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -386,11 +386,11 @@ UNIV_INLINE
 ibool
 xdes_get_bit(
 /*=========*/
-	xdes_t*	descr,	/*!< in: descriptor */
-	ulint	bit,	/*!< in: XDES_FREE_BIT or XDES_CLEAN_BIT */
-	ulint	offset,	/*!< in: page offset within extent:
-			0 ... FSP_EXTENT_SIZE - 1 */
-	mtr_t*	mtr)	/*!< in: mtr */
+	const xdes_t*	descr,	/*!< in: descriptor */
+	ulint		bit,	/*!< in: XDES_FREE_BIT or XDES_CLEAN_BIT */
+	ulint		offset,	/*!< in: page offset within extent:
+				0 ... FSP_EXTENT_SIZE - 1 */
+	mtr_t*		mtr)	/*!< in: mtr */
 {
 	ulint	index;
 	ulint	byte_index;
@@ -527,8 +527,8 @@ UNIV_INLINE
 ulint
 xdes_get_n_used(
 /*============*/
-	xdes_t*	descr,	/*!< in: descriptor */
-	mtr_t*	mtr)	/*!< in: mtr */
+	const xdes_t*	descr,	/*!< in: descriptor */
+	mtr_t*		mtr)	/*!< in: mtr */
 {
 	ulint	i;
 	ulint	count	= 0;
@@ -551,8 +551,8 @@ UNIV_INLINE
 ibool
 xdes_is_free(
 /*=========*/
-	xdes_t*	descr,	/*!< in: descriptor */
-	mtr_t*	mtr)	/*!< in: mtr */
+	const xdes_t*	descr,	/*!< in: descriptor */
+	mtr_t*		mtr)	/*!< in: mtr */
 {
 	if (0 == xdes_get_n_used(descr, mtr)) {
 
@@ -569,8 +569,8 @@ UNIV_INLINE
 ibool
 xdes_is_full(
 /*=========*/
-	xdes_t*	descr,	/*!< in: descriptor */
-	mtr_t*	mtr)	/*!< in: mtr */
+	const xdes_t*	descr,	/*!< in: descriptor */
+	mtr_t*		mtr)	/*!< in: mtr */
 {
 	if (FSP_EXTENT_SIZE == xdes_get_n_used(descr, mtr)) {
 
@@ -586,7 +586,7 @@ UNIV_INLINE
 void
 xdes_set_state(
 /*===========*/
-	xdes_t*	descr,	/*!< in: descriptor */
+	xdes_t*	descr,	/*!< in/out: descriptor */
 	ulint	state,	/*!< in: state to set */
 	mtr_t*	mtr)	/*!< in: mtr handle */
 {
@@ -605,8 +605,8 @@ UNIV_INLINE
 ulint
 xdes_get_state(
 /*===========*/
-	xdes_t*	descr,	/*!< in: descriptor */
-	mtr_t*	mtr)	/*!< in: mtr handle */
+	const xdes_t*	descr,	/*!< in: descriptor */
+	mtr_t*		mtr)	/*!< in: mtr handle */
 {
 	ulint	state;
 
@@ -705,7 +705,7 @@ UNIV_INLINE
 xdes_t*
 xdes_get_descriptor_with_space_hdr(
 /*===============================*/
-	fsp_header_t*	sp_header,/*!< in: space header, x-latched */
+	fsp_header_t*	sp_header,/*!< in/out: space header, x-latched */
 	ulint		space,	/*!< in: space id */
 	ulint		offset,	/*!< in: page offset;
 				if equal to the free limit,
@@ -869,9 +869,7 @@ fsp_init_file_page_low(
 		return;
 	}
 
-#ifdef UNIV_BASIC_LOG_DEBUG
-	memset(page, 0xff, UNIV_PAGE_SIZE);
-#endif
+	UNIV_MEM_INVALID(page, UNIV_PAGE_SIZE);
 	mach_write_to_4(page + FIL_PAGE_OFFSET, buf_block_get_page_no(block));
 	memset(page + FIL_PAGE_LSN, 0, 8);
 	mach_write_to_4(page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID,
@@ -1342,7 +1340,7 @@ fsp_fill_free_list(
 					descriptor page and ibuf bitmap page;
 					then we do not allocate more extents */
 	ulint		space,		/*!< in: space */
-	fsp_header_t*	header,		/*!< in: space header */
+	fsp_header_t*	header,		/*!< in/out: space header */
 	mtr_t*		mtr)		/*!< in: mtr */
 {
 	ulint	limit;

=== modified file 'storage/innodb_plugin/ha/ha0ha.c'
--- a/storage/innodb_plugin/ha/ha0ha.c	2009-07-30 12:42:56 +0000
+++ b/storage/innodb_plugin/ha/ha0ha.c	2010-04-01 13:02:01 +0000
@@ -101,6 +101,8 @@ ha_clear(
 	ulint	i;
 	ulint	n;
 
+	ut_ad(table);
+	ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
 #ifdef UNIV_SYNC_DEBUG
 	ut_ad(rw_lock_own(&btr_search_latch, RW_LOCK_EXCLUSIVE));
 #endif /* UNIV_SYNC_DEBUG */
@@ -146,7 +148,9 @@ ha_insert_for_fold_func(
 	ha_node_t*	prev_node;
 	ulint		hash;
 
-	ut_ad(table && data);
+	ut_ad(data);
+	ut_ad(table);
+	ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
 #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
 	ut_a(block->frame == page_align(data));
 #endif /* UNIV_AHI_DEBUG || UNIV_DEBUG */
@@ -237,6 +241,8 @@ ha_delete_hash_node(
 	hash_table_t*	table,		/*!< in: hash table */
 	ha_node_t*	del_node)	/*!< in: node to be deleted */
 {
+	ut_ad(table);
+	ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
 #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
 # ifndef UNIV_HOTBACKUP
 	if (table->adaptive) {
@@ -267,6 +273,8 @@ ha_search_and_update_if_found_func(
 {
 	ha_node_t*	node;
 
+	ut_ad(table);
+	ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
 	ASSERT_HASH_MUTEX_OWN(table, fold);
 #if defined UNIV_AHI_DEBUG || defined UNIV_DEBUG
 	ut_a(new_block->frame == page_align(new_data));
@@ -304,6 +312,8 @@ ha_remove_all_nodes_to_page(
 {
 	ha_node_t*	node;
 
+	ut_ad(table);
+	ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
 	ASSERT_HASH_MUTEX_OWN(table, fold);
 
 	node = ha_chain_get_first(table, fold);
@@ -353,6 +363,8 @@ ha_validate(
 	ibool		ok	= TRUE;
 	ulint		i;
 
+	ut_ad(table);
+	ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
 	ut_a(start_index <= end_index);
 	ut_a(start_index < hash_get_n_cells(table));
 	ut_a(end_index < hash_get_n_cells(table));
@@ -391,6 +403,8 @@ ha_print_info(
 	FILE*		file,	/*!< in: file where to print */
 	hash_table_t*	table)	/*!< in: hash table */
 {
+	ut_ad(table);
+	ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
 #ifdef UNIV_DEBUG
 /* Some of the code here is disabled for performance reasons in production
 builds, see http://bugs.mysql.com/36941 */

=== modified file 'storage/innodb_plugin/ha/hash0hash.c'
--- a/storage/innodb_plugin/ha/hash0hash.c	2009-05-27 09:45:59 +0000
+++ b/storage/innodb_plugin/ha/hash0hash.c	2010-04-01 13:02:01 +0000
@@ -119,7 +119,7 @@ hash_create(
 	table->heaps = NULL;
 #endif /* !UNIV_HOTBACKUP */
 	table->heap = NULL;
-	table->magic_n = HASH_TABLE_MAGIC_N;
+	ut_d(table->magic_n = HASH_TABLE_MAGIC_N);
 
 	/* Initialize the cell array */
 	hash_table_clear(table);
@@ -135,6 +135,8 @@ hash_table_free(
 /*============*/
 	hash_table_t*	table)	/*!< in, own: hash table */
 {
+	ut_ad(table);
+	ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
 #ifndef UNIV_HOTBACKUP
 	ut_a(table->mutexes == NULL);
 #endif /* !UNIV_HOTBACKUP */
@@ -160,6 +162,8 @@ hash_create_mutexes_func(
 {
 	ulint	i;
 
+	ut_ad(table);
+	ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
 	ut_a(n_mutexes > 0);
 	ut_a(ut_is_2pow(n_mutexes));
 

=== modified file 'storage/innodb_plugin/handler/ha_innodb.cc'
--- a/storage/innodb_plugin/handler/ha_innodb.cc	2010-01-13 10:28:42 +0000
+++ b/storage/innodb_plugin/handler/ha_innodb.cc	2010-04-01 13:01:43 +0000
@@ -1,7 +1,8 @@
 /*****************************************************************************
 
-Copyright (c) 2000, 2009, MySQL AB & Innobase Oy. All Rights Reserved.
+Copyright (c) 2000, 2010, MySQL AB & Innobase Oy. All Rights Reserved.
 Copyright (c) 2008, 2009 Google Inc.
+Copyright (c) 2009, Percona Inc.
 
 Portions of this file contain modifications contributed and copyrighted by
 Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -9,6 +10,13 @@ briefly in the InnoDB documentation. The
 incorporated with their permission, and subject to the conditions contained in
 the file COPYING.Google.
 
+Portions of this file contain modifications contributed and copyrighted
+by Percona Inc.. Those modifications are
+gratefully acknowledged and are described briefly in the InnoDB
+documentation. The contributions by Percona Inc. are incorporated with
+their permission, and subject to the conditions contained in the file
+COPYING.Percona.
+
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
 Foundation; version 2 of the License.
@@ -22,32 +30,6 @@ this program; if not, write to the Free 
 Place, Suite 330, Boston, MA 02111-1307 USA
 
 *****************************************************************************/
-/***********************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-Copyright (c) 2009, Percona Inc.
-
-Portions of this file contain modifications contributed and copyrighted
-by Percona Inc.. Those modifications are
-gratefully acknowledged and are described briefly in the InnoDB
-documentation. The contributions by Percona Inc. are incorporated with
-their permission, and subject to the conditions contained in the file
-COPYING.Percona.
-
-This program is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
-Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-***********************************************************************/
 
 /* TODO list for the InnoDB handler in 5.0:
   - Remove the flag trx->active_trans and look at trx->conc_state
@@ -3006,59 +2988,370 @@ normalize_table_name(
 }
 
 /********************************************************************//**
+Get the upper limit of the MySQL integral and floating-point type.
+@return maximum allowed value for the field */
+static
+ulonglong
+innobase_get_int_col_max_value(
+/*===========================*/
+	const Field*	field)	/*!< in: MySQL field */
+{
+	ulonglong	max_value = 0;
+
+	switch(field->key_type()) {
+	/* TINY */
+	case HA_KEYTYPE_BINARY:
+		max_value = 0xFFULL;
+		break;
+	case HA_KEYTYPE_INT8:
+		max_value = 0x7FULL;
+		break;
+	/* SHORT */
+	case HA_KEYTYPE_USHORT_INT:
+		max_value = 0xFFFFULL;
+		break;
+	case HA_KEYTYPE_SHORT_INT:
+		max_value = 0x7FFFULL;
+		break;
+	/* MEDIUM */
+	case HA_KEYTYPE_UINT24:
+		max_value = 0xFFFFFFULL;
+		break;
+	case HA_KEYTYPE_INT24:
+		max_value = 0x7FFFFFULL;
+		break;
+	/* LONG */
+	case HA_KEYTYPE_ULONG_INT:
+		max_value = 0xFFFFFFFFULL;
+		break;
+	case HA_KEYTYPE_LONG_INT:
+		max_value = 0x7FFFFFFFULL;
+		break;
+	/* BIG */
+	case HA_KEYTYPE_ULONGLONG:
+		max_value = 0xFFFFFFFFFFFFFFFFULL;
+		break;
+	case HA_KEYTYPE_LONGLONG:
+		max_value = 0x7FFFFFFFFFFFFFFFULL;
+		break;
+	case HA_KEYTYPE_FLOAT:
+		/* We use the maximum as per IEEE754-2008 standard, 2^24 */
+		max_value = 0x1000000ULL;
+		break;
+	case HA_KEYTYPE_DOUBLE:
+		/* We use the maximum as per IEEE754-2008 standard, 2^53 */
+		max_value = 0x20000000000000ULL;
+		break;
+	default:
+		ut_error;
+	}
+
+	return(max_value);
+}
+
+/*******************************************************************//**
+This function checks whether the index column information
+is consistent between KEY info from mysql and that from innodb index.
+@return TRUE if all column types match. */
+static
+ibool
+innobase_match_index_columns(
+/*=========================*/
+	const KEY*		key_info,	/*!< in: Index info
+						from mysql */
+	const dict_index_t*	index_info)	/*!< in: Index info
+						from Innodb */
+{
+	const KEY_PART_INFO*	key_part;
+	const KEY_PART_INFO*	key_end;
+	const dict_field_t*	innodb_idx_fld;
+	const dict_field_t*	innodb_idx_fld_end;
+
+	DBUG_ENTER("innobase_match_index_columns");
+
+	/* Check whether user defined index column count matches */
+	if (key_info->key_parts != index_info->n_user_defined_cols) {
+		DBUG_RETURN(FALSE);
+	}
+
+	key_part = key_info->key_part;
+	key_end = key_part + key_info->key_parts;
+	innodb_idx_fld = index_info->fields;
+	innodb_idx_fld_end = index_info->fields + index_info->n_fields;
+
+	/* Check each index column's datatype. We do not check
+	column name because there exists case that index
+	column name got modified in mysql but such change does not
+	propagate to InnoDB.
+	One hidden assumption here is that the index column sequences
+	are matched up between those in mysql and Innodb. */
+	for (; key_part != key_end; ++key_part) {
+		ulint	col_type;
+		ibool	is_unsigned;
+		ulint	mtype = innodb_idx_fld->col->mtype;
+
+		/* Need to translate to InnoDB column type before
+		comparison. */
+		col_type = get_innobase_type_from_mysql_type(&is_unsigned,
+							     key_part->field);
+
+		/* Ignore Innodb specific system columns. */
+		while (mtype == DATA_SYS) {
+			innodb_idx_fld++;
+
+			if (innodb_idx_fld >= innodb_idx_fld_end) {
+				DBUG_RETURN(FALSE);
+			}
+		}
+
+		if (col_type != mtype) {
+			/* Column Type mismatches */
+			DBUG_RETURN(FALSE);
+		}
+
+		innodb_idx_fld++;
+	}
+
+	DBUG_RETURN(TRUE);
+}
+
+/*******************************************************************//**
+This function builds a translation table in INNOBASE_SHARE
+structure for fast index location with mysql array number from its
+table->key_info structure. This also provides the necessary translation
+between the key order in mysql key_info and Innodb ib_table->indexes if
+they are not fully matched with each other.
+Note we do not have any mutex protecting the translation table
+building based on the assumption that there is no concurrent
+index creation/drop and DMLs that requires index lookup. All table
+handle will be closed before the index creation/drop.
+@return TRUE if index translation table built successfully */
+static
+ibool
+innobase_build_index_translation(
+/*=============================*/
+	const TABLE*		table,	  /*!< in: table in MySQL data
+					  dictionary */
+	dict_table_t*		ib_table, /*!< in: table in Innodb data
+					  dictionary */
+	INNOBASE_SHARE*		share)	  /*!< in/out: share structure
+					  where index translation table
+					  will be constructed in. */
+{
+	ulint		mysql_num_index;
+	ulint		ib_num_index;
+	dict_index_t**	index_mapping;
+	ibool		ret = TRUE;
+
+	DBUG_ENTER("innobase_build_index_translation");
+
+	mysql_num_index = table->s->keys;
+	ib_num_index = UT_LIST_GET_LEN(ib_table->indexes);
+
+	index_mapping = share->idx_trans_tbl.index_mapping;
+
+	/* If there exists inconsistency between MySQL and InnoDB dictionary
+	(metadata) information, the number of index defined in MySQL
+	could exceed that in InnoDB, do not build index translation
+	table in such case */
+	if (UNIV_UNLIKELY(ib_num_index < mysql_num_index)) {
+		ret = FALSE;
+		goto func_exit;
+	}
+
+	/* If index entry count is non-zero, nothing has
+	changed since last update, directly return TRUE */
+	if (share->idx_trans_tbl.index_count) {
+		/* Index entry count should still match mysql_num_index */
+		ut_a(share->idx_trans_tbl.index_count == mysql_num_index);
+		goto func_exit;
+	}
+
+	/* The number of index increased, rebuild the mapping table */
+	if (mysql_num_index > share->idx_trans_tbl.array_size) {
+		index_mapping = (dict_index_t**) my_realloc(index_mapping,
+							mysql_num_index *
+							sizeof(*index_mapping),
+							MYF(MY_ALLOW_ZERO_PTR));
+
+		if (!index_mapping) {
+			ret = FALSE;
+			goto func_exit;
+		}
+
+		share->idx_trans_tbl.array_size = mysql_num_index;
+	}
+
+
+	/* For each index in the mysql key_info array, fetch its
+	corresponding InnoDB index pointer into index_mapping
+	array. */
+	for (ulint count = 0; count < mysql_num_index; count++) {
+
+		/* Fetch index pointers into index_mapping according to mysql
+		index sequence */
+		index_mapping[count] = dict_table_get_index_on_name(
+			ib_table, table->key_info[count].name);
+
+		if (!index_mapping[count]) {
+			sql_print_error("Cannot find index %s in InnoDB "
+					"index dictionary.",
+					table->key_info[count].name);
+			ret = FALSE;
+			goto func_exit;
+		}
+
+		/* Double check fetched index has the same
+		column info as those in mysql key_info. */
+		if (!innobase_match_index_columns(&table->key_info[count],
+					          index_mapping[count])) {
+			sql_print_error("Found index %s whose column info "
+					"does not match that of MySQL.",
+					table->key_info[count].name);
+			ret = FALSE;
+			goto func_exit;
+		}
+	}
+
+	/* Successfully built the translation table */
+	share->idx_trans_tbl.index_count = mysql_num_index;
+
+func_exit:
+	if (!ret) {
+		/* Build translation table failed. */
+		my_free(index_mapping, MYF(MY_ALLOW_ZERO_PTR));
+
+		share->idx_trans_tbl.array_size = 0;
+		share->idx_trans_tbl.index_count = 0;
+		index_mapping = NULL;
+	}
+
+	share->idx_trans_tbl.index_mapping = index_mapping;
+
+	DBUG_RETURN(ret);
+}
+
+/*******************************************************************//**
+This function uses index translation table to quickly locate the
+requested index structure.
+Note we do not have mutex protection for the index translatoin table
+access, it is based on the assumption that there is no concurrent
+translation table rebuild (fter create/drop index) and DMLs that
+require index lookup.
+@return dict_index_t structure for requested index. NULL if
+fail to locate the index structure. */
+static
+dict_index_t*
+innobase_index_lookup(
+/*==================*/
+	INNOBASE_SHARE*	share,	/*!< in: share structure for index
+				translation table. */
+	uint		keynr)	/*!< in: index number for the requested
+				index */
+{
+	if (!share->idx_trans_tbl.index_mapping
+	    || keynr >= share->idx_trans_tbl.index_count) {
+		return(NULL);
+	}
+
+	return(share->idx_trans_tbl.index_mapping[keynr]);
+}
+
+/************************************************************************
 Set the autoinc column max value. This should only be called once from
-ha_innobase::open(). Therefore there's no need for a covering lock.
-@return	DB_SUCCESS or error code */
+ha_innobase::open(). Therefore there's no need for a covering lock. */
 UNIV_INTERN
-ulint
+void
 ha_innobase::innobase_initialize_autoinc()
 /*======================================*/
 {
-	dict_index_t*	index;
 	ulonglong	auto_inc;
-	const char*	col_name;
-	ulint		error;
+	const Field*	field = table->found_next_number_field;
 
-	col_name = table->found_next_number_field->field_name;
-	index = innobase_get_index(table->s->next_number_index);
+	if (field != NULL) {
+		auto_inc = innobase_get_int_col_max_value(field);
+	} else {
+		/* We have no idea what's been passed in to us as the
+		autoinc column. We set it to the 0, effectively disabling
+		updates to the table. */
+		auto_inc = 0;
 
-	/* Execute SELECT MAX(col_name) FROM TABLE; */
-	error = row_search_max_autoinc(index, col_name, &auto_inc);
+		ut_print_timestamp(stderr);
+		fprintf(stderr, "  InnoDB: Unable to determine the AUTOINC "
+				"column name\n");
+	}
 
-	switch (error) {
-	case DB_SUCCESS:
+	if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
+		/* If the recovery level is set so high that writes
+		are disabled we force the AUTOINC counter to 0
+		value effectively disabling writes to the table.
+		Secondly, we avoid reading the table in case the read
+		results in failure due to a corrupted table/index.
+
+		We will not return an error to the client, so that the
+		tables can be dumped with minimal hassle.  If an error
+		were returned in this case, the first attempt to read
+		the table would fail and subsequent SELECTs would succeed. */
+		auto_inc = 0;
+	} else if (field == NULL) {
+		/* This is a far more serious error, best to avoid
+		opening the table and return failure. */
+		my_error(ER_AUTOINC_READ_FAILED, MYF(0));
+	} else {
+		dict_index_t*	index;
+		const char*	col_name;
+		ulonglong	read_auto_inc;
+		ulint		err;
 
-		/* At the this stage we don't know the increment
-		or the offset, so use default inrement of 1. */
-		++auto_inc;
-		break;
+		update_thd(ha_thd());
 
-	case DB_RECORD_NOT_FOUND:
-		ut_print_timestamp(stderr);
-		fprintf(stderr, "  InnoDB: MySQL and InnoDB data "
-			"dictionaries are out of sync.\n"
-			"InnoDB: Unable to find the AUTOINC column %s in the "
-			"InnoDB table %s.\n"
-			"InnoDB: We set the next AUTOINC column value to the "
-			"maximum possible value,\n"
-			"InnoDB: in effect disabling the AUTOINC next value "
-			"generation.\n"
-			"InnoDB: You can either set the next AUTOINC value "
-			"explicitly using ALTER TABLE\n"
-			"InnoDB: or fix the data dictionary by recreating "
-			"the table.\n",
-			col_name, index->table->name);
+		ut_a(prebuilt->trx == thd_to_trx(user_thd));
 
-		auto_inc = 0xFFFFFFFFFFFFFFFFULL;
-		break;
+		col_name = field->field_name;
+		index = innobase_get_index(table->s->next_number_index);
 
-	default:
-		return(error);
+		/* Execute SELECT MAX(col_name) FROM TABLE; */
+		err = row_search_max_autoinc(index, col_name, &read_auto_inc);
+
+		switch (err) {
+		case DB_SUCCESS:
+			/* At the this stage we do not know the increment
+			or the offset, so use a default increment of 1. */
+			auto_inc = read_auto_inc + 1;
+			break;
+
+		case DB_RECORD_NOT_FOUND:
+			ut_print_timestamp(stderr);
+			fprintf(stderr, "  InnoDB: MySQL and InnoDB data "
+				"dictionaries are out of sync.\n"
+				"InnoDB: Unable to find the AUTOINC column "
+				"%s in the InnoDB table %s.\n"
+				"InnoDB: We set the next AUTOINC column "
+				"value to 0,\n"
+				"InnoDB: in effect disabling the AUTOINC "
+				"next value generation.\n"
+				"InnoDB: You can either set the next "
+				"AUTOINC value explicitly using ALTER TABLE\n"
+				"InnoDB: or fix the data dictionary by "
+				"recreating the table.\n",
+				col_name, index->table->name);
+
+			/* This will disable the AUTOINC generation. */
+			auto_inc = 0;
+
+			/* We want the open to succeed, so that the user can
+			take corrective action. ie. reads should succeed but
+			updates should fail. */
+			err = DB_SUCCESS;
+			break;
+		default:
+			/* row_search_max_autoinc() should only return
+			one of DB_SUCCESS or DB_RECORD_NOT_FOUND. */
+			ut_error;
+		}
 	}
 
 	dict_table_autoinc_initialize(prebuilt->table, auto_inc);
-
-	return(DB_SUCCESS);
 }
 
 /*****************************************************************//**
@@ -3192,6 +3485,11 @@ retry:
 	primary_key = table->s->primary_key;
 	key_used_on_scan = primary_key;
 
+	if (!innobase_build_index_translation(table, ib_table, share)) {
+		  sql_print_error("Build InnoDB index translation table for"
+				  " Table %s failed", name);
+	}
+
 	/* Allocate a buffer for a 'row reference'. A row reference is
 	a string of bytes of length ref_length which uniquely specifies
 	a row in our table. Note that MySQL may also compare two row
@@ -3199,31 +3497,86 @@ retry:
 	of length ref_length! */
 
 	if (!row_table_got_default_clust_index(ib_table)) {
-		if (primary_key >= MAX_KEY) {
-		  sql_print_error("Table %s has a primary key in InnoDB data "
-				  "dictionary, but not in MySQL!", name);
-		}
 
 		prebuilt->clust_index_was_generated = FALSE;
 
-		/* MySQL allocates the buffer for ref. key_info->key_length
-		includes space for all key columns + one byte for each column
-		that may be NULL. ref_length must be as exact as possible to
-		save space, because all row reference buffers are allocated
-		based on ref_length. */
+		if (UNIV_UNLIKELY(primary_key >= MAX_KEY)) {
+			sql_print_error("Table %s has a primary key in "
+					"InnoDB data dictionary, but not "
+					"in MySQL!", name);
+
+			/* This mismatch could cause further problems
+			if not attended, bring this to the user's attention
+			by printing a warning in addition to log a message
+			in the errorlog */
+			push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+					    ER_NO_SUCH_INDEX,
+					    "InnoDB: Table %s has a "
+					    "primary key in InnoDB data "
+					    "dictionary, but not in "
+					    "MySQL!", name);
+
+			/* If primary_key >= MAX_KEY, its (primary_key)
+			value could be out of bound if continue to index
+			into key_info[] array. Find InnoDB primary index,
+			and assign its key_length to ref_length.
+			In addition, since MySQL indexes are sorted starting
+			with primary index, unique index etc., initialize
+			ref_length to the first index key length in
+			case we fail to find InnoDB cluster index.
+
+			Please note, this will not resolve the primary
+			index mismatch problem, other side effects are
+			possible if users continue to use the table.
+			However, we allow this table to be opened so
+			that user can adopt necessary measures for the
+			mismatch while still being accessible to the table
+			date. */
+			ref_length = table->key_info[0].key_length;
+
+			/* Find correspoinding cluster index
+			key length in MySQL's key_info[] array */
+			for (ulint i = 0; i < table->s->keys; i++) {
+				dict_index_t*	index;
+				index = innobase_get_index(i);
+				if (dict_index_is_clust(index)) {
+					ref_length =
+						 table->key_info[i].key_length;
+				}
+			}
+		} else {
+			/* MySQL allocates the buffer for ref.
+			key_info->key_length includes space for all key
+			columns + one byte for each column that may be
+			NULL. ref_length must be as exact as possible to
+			save space, because all row reference buffers are
+			allocated based on ref_length. */
 
-		ref_length = table->key_info[primary_key].key_length;
+			ref_length = table->key_info[primary_key].key_length;
+		}
 	} else {
 		if (primary_key != MAX_KEY) {
-		  sql_print_error("Table %s has no primary key in InnoDB data "
-				  "dictionary, but has one in MySQL! If you "
-				  "created the table with a MySQL version < "
-				  "3.23.54 and did not define a primary key, "
-				  "but defined a unique key with all non-NULL "
-				  "columns, then MySQL internally treats that "
-				  "key as the primary key. You can fix this "
-				  "error by dump + DROP + CREATE + reimport "
-				  "of the table.", name);
+			sql_print_error(
+				"Table %s has no primary key in InnoDB data "
+				"dictionary, but has one in MySQL! If you "
+				"created the table with a MySQL version < "
+				"3.23.54 and did not define a primary key, "
+				"but defined a unique key with all non-NULL "
+				"columns, then MySQL internally treats that "
+				"key as the primary key. You can fix this "
+				"error by dump + DROP + CREATE + reimport "
+				"of the table.", name);
+
+			/* This mismatch could cause further problems
+			if not attended, bring this to the user attention
+			by printing a warning in addition to log a message
+			in the errorlog */
+			push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+					    ER_NO_SUCH_INDEX,
+					    "InnoDB: Table %s has no "
+					    "primary key in InnoDB data "
+					    "dictionary, but has one in "
+					    "MySQL!", name);
 		}
 
 		prebuilt->clust_index_was_generated = TRUE;
@@ -3265,8 +3618,6 @@ retry:
 
 	/* Only if the table has an AUTOINC column. */
 	if (prebuilt->table != NULL && table->found_next_number_field != NULL) {
-		ulint	error;
-
 		dict_table_autoinc_lock(prebuilt->table);
 
 		/* Since a table can already be "open" in InnoDB's internal
@@ -3275,8 +3626,7 @@ retry:
 		autoinc value from a previous MySQL open. */
 		if (dict_table_autoinc_read(prebuilt->table) == 0) {
 
-			error = innobase_initialize_autoinc();
-			ut_a(error == DB_SUCCESS);
+			innobase_initialize_autoinc();
 		}
 
 		dict_table_autoinc_unlock(prebuilt->table);
@@ -4093,67 +4443,6 @@ skip_field:
 }
 
 /********************************************************************//**
-Get the upper limit of the MySQL integral and floating-point type. */
-UNIV_INTERN
-ulonglong
-ha_innobase::innobase_get_int_col_max_value(
-/*========================================*/
-	const Field*	field)
-{
-	ulonglong	max_value = 0;
-
-	switch(field->key_type()) {
-	/* TINY */
-	case HA_KEYTYPE_BINARY:
-		max_value = 0xFFULL;
-		break;
-	case HA_KEYTYPE_INT8:
-		max_value = 0x7FULL;
-		break;
-	/* SHORT */
-	case HA_KEYTYPE_USHORT_INT:
-		max_value = 0xFFFFULL;
-		break;
-	case HA_KEYTYPE_SHORT_INT:
-		max_value = 0x7FFFULL;
-		break;
-	/* MEDIUM */
-	case HA_KEYTYPE_UINT24:
-		max_value = 0xFFFFFFULL;
-		break;
-	case HA_KEYTYPE_INT24:
-		max_value = 0x7FFFFFULL;
-		break;
-	/* LONG */
-	case HA_KEYTYPE_ULONG_INT:
-		max_value = 0xFFFFFFFFULL;
-		break;
-	case HA_KEYTYPE_LONG_INT:
-		max_value = 0x7FFFFFFFULL;
-		break;
-	/* BIG */
-	case HA_KEYTYPE_ULONGLONG:
-		max_value = 0xFFFFFFFFFFFFFFFFULL;
-		break;
-	case HA_KEYTYPE_LONGLONG:
-		max_value = 0x7FFFFFFFFFFFFFFFULL;
-		break;
-	case HA_KEYTYPE_FLOAT:
-		/* We use the maximum as per IEEE754-2008 standard, 2^24 */
-		max_value = 0x1000000ULL;
-		break;
-	case HA_KEYTYPE_DOUBLE:
-		/* We use the maximum as per IEEE754-2008 standard, 2^53 */
-		max_value = 0x20000000000000ULL;
-		break;
-	default:
-		ut_error;
-	}
-
-	return(max_value);
-}
-
-/********************************************************************//**
 This special handling is really to overcome the limitations of MySQL's
 binlogging. We need to eliminate the non-determinism that will arise in
 INSERT ... SELECT type of statements, since MySQL binlog only stores the
@@ -4378,11 +4667,17 @@ no_commit:
 		prebuilt->autoinc_error = DB_SUCCESS;
 
 		if ((error = update_auto_increment())) {
-
 			/* We don't want to mask autoinc overflow errors. */
-			if (prebuilt->autoinc_error != DB_SUCCESS) {
-				error = (int) prebuilt->autoinc_error;
 
+			/* Handle the case where the AUTOINC sub-system
+			failed during initialization. */
+			if (prebuilt->autoinc_error == DB_UNSUPPORTED) {
+				error_result = ER_AUTOINC_READ_FAILED;
+				/* Set the error message to report too. */
+				my_error(ER_AUTOINC_READ_FAILED, MYF(0));
+				goto func_exit;
+			} else if (prebuilt->autoinc_error != DB_SUCCESS) {
+				error = (int) prebuilt->autoinc_error;
 				goto report_error;
 			}
 
@@ -4463,24 +4758,29 @@ no_commit:
 			update the table upper limit. Note: last_value
 			will be 0 if get_auto_increment() was not called.*/
 
-			if (auto_inc <= col_max_value
-			    && auto_inc >= prebuilt->autoinc_last_value) {
+			if (auto_inc >= prebuilt->autoinc_last_value) {
 set_max_autoinc:
-				ut_a(prebuilt->autoinc_increment > 0);
-
-				ulonglong	need;
-				ulonglong	offset;
-
-				offset = prebuilt->autoinc_offset;
-				need = prebuilt->autoinc_increment;
-
-				auto_inc = innobase_next_autoinc(
-					auto_inc, need, offset, col_max_value);
-
-				err = innobase_set_max_autoinc(auto_inc);
-
-				if (err != DB_SUCCESS) {
-					error = err;
+				/* This should filter out the negative
+				values set explicitly by the user. */
+				if (auto_inc <= col_max_value) {
+					ut_a(prebuilt->autoinc_increment > 0);
+
+					ulonglong	need;
+					ulonglong	offset;
+
+					offset = prebuilt->autoinc_offset;
+					need = prebuilt->autoinc_increment;
+
+					auto_inc = innobase_next_autoinc(
+						auto_inc,
+						need, offset, col_max_value);
+
+					err = innobase_set_max_autoinc(
+						auto_inc);
+
+					if (err != DB_SUCCESS) {
+						error = err;
+					}
 				}
 			}
 			break;
@@ -5160,14 +5460,30 @@ ha_innobase::innobase_get_index(
 	DBUG_ENTER("innobase_get_index");
 	ha_statistic_increment(&SSV::ha_read_key_count);
 
-	ut_ad(user_thd == ha_thd());
-	ut_a(prebuilt->trx == thd_to_trx(user_thd));
-
 	if (keynr != MAX_KEY && table->s->keys > 0) {
 		key = table->key_info + keynr;
 
-		index = dict_table_get_index_on_name(prebuilt->table,
-						     key->name);
+		index = innobase_index_lookup(share, keynr);
+
+		if (index) {
+			ut_a(ut_strcmp(index->name, key->name) == 0);
+		} else {
+			/* Can't find index with keynr in the translation
+			table. Only print message if the index translation
+			table exists */
+			if (share->idx_trans_tbl.index_mapping) {
+				sql_print_error("InnoDB could not find "
+						"index %s key no %u for "
+						"table %s through its "
+						"index translation table",
+						key ? key->name : "NULL",
+						keynr,
+						prebuilt->table->name);
+			}
+
+			index = dict_table_get_index_on_name(prebuilt->table,
+							     key->name);
+		}
 	} else {
 		index = dict_table_get_first_index(prebuilt->table);
 	}
@@ -5228,7 +5544,7 @@ ha_innobase::change_active_index(
 	dtuple_set_n_fields(prebuilt->search_tuple, prebuilt->index->n_fields);
 
 	dict_index_copy_types(prebuilt->search_tuple, prebuilt->index,
-			prebuilt->index->n_fields);
+			      prebuilt->index->n_fields);
 
 	/* MySQL changes the active index for a handle also during some
 	queries, for example SELECT MAX(a), SUM(a) first retrieves the MAX()
@@ -5729,9 +6045,11 @@ create_table_def(
 
 	if (error == DB_DUPLICATE_KEY) {
 		char buf[100];
-		innobase_convert_identifier(buf, sizeof buf,
-					    table_name, strlen(table_name),
-					    trx->mysql_thd, TRUE);
+		char* buf_end = innobase_convert_identifier(
+			buf, sizeof buf - 1, table_name, strlen(table_name),
+			trx->mysql_thd, TRUE);
+
+		*buf_end = '\0';
 		my_error(ER_TABLE_EXISTS_ERROR, MYF(0), buf);
 	}
 
@@ -6333,6 +6651,10 @@ ha_innobase::create(
 		goto cleanup;
 	}
 
+	if (create_info->options & HA_LEX_CREATE_TMP_TABLE) {
+		flags |= DICT_TF2_TEMPORARY << DICT_TF2_SHIFT;
+	}
+
 	error = create_table_def(trx, form, norm_name,
 		create_info->options & HA_LEX_CREATE_TMP_TABLE ? name2 : NULL,
 		flags);
@@ -6854,10 +7176,15 @@ ha_innobase::records_in_range(
 
 	key = table->key_info + active_index;
 
-	index = dict_table_get_index_on_name(prebuilt->table, key->name);
+	index = innobase_get_index(keynr);
 
-	/* MySQL knows about this index and so we must be able to find it.*/
-	ut_a(index);
+	/* There exists possibility of not being able to find requested
+	index due to inconsistency between MySQL and InoDB dictionary info.
+	Necessary message should have been printed in innobase_get_index() */
+	if (UNIV_UNLIKELY(!index)) {
+		n_rows = HA_POS_ERROR;
+		goto func_exit;
+	}
 
 	heap = mem_heap_create(2 * (key->key_parts * sizeof(dfield_t)
 				    + sizeof(dtuple_t)));
@@ -6902,6 +7229,7 @@ ha_innobase::records_in_range(
 
 	mem_heap_free(heap);
 
+func_exit:
 	my_free(key_val_buff2, MYF(0));
 
 	prebuilt->trx->op_info = (char*)"";
@@ -7043,6 +7371,7 @@ ha_innobase::info(
 	char		path[FN_REFLEN];
 	os_file_stat_t	stat_info;
 
+
 	DBUG_ENTER("info");
 
 	/* If we are forcing recovery at a high level, we will suppress
@@ -7203,13 +7532,29 @@ ha_innobase::info(
 	}
 
 	if (flag & HA_STATUS_CONST) {
-		index = dict_table_get_first_index(ib_table);
-
-		if (prebuilt->clust_index_was_generated) {
-			index = dict_table_get_next_index(index);
+		/* Verify the number of index in InnoDB and MySQL
+		matches up. If prebuilt->clust_index_was_generated
+		holds, InnoDB defines GEN_CLUST_INDEX internally */
+		ulint	num_innodb_index = UT_LIST_GET_LEN(ib_table->indexes)
+					- prebuilt->clust_index_was_generated;
+
+		if (table->s->keys != num_innodb_index) {
+			sql_print_error("Table %s contains %lu "
+					"indexes inside InnoDB, which "
+					"is different from the number of "
+					"indexes %u defined in the MySQL ",
+					ib_table->name, num_innodb_index,
+					table->s->keys);
 		}
 
 		for (i = 0; i < table->s->keys; i++) {
+			/* We could get index quickly through internal
+			index mapping with the index translation table.
+			The identity of index (match up index name with
+			that of table->key_info[i]) is already verified in
+			innobase_get_index().  */
+			index = innobase_get_index(i);
+
 			if (index == NULL) {
 				sql_print_error("Table %s contains fewer "
 						"indexes inside InnoDB than "
@@ -7261,8 +7606,6 @@ ha_innobase::info(
 				  rec_per_key >= ~(ulong) 0 ? ~(ulong) 0 :
 				  (ulong) rec_per_key;
 			}
-
-			index = dict_table_get_next_index(index);
 		}
 	}
 
@@ -7340,8 +7683,13 @@ ha_innobase::check(
 	HA_CHECK_OPT*	check_opt)	/*!< in: check options, currently
 					ignored */
 {
-	ulint		ret;
+	dict_index_t*	index;
+	ulint		n_rows;
+	ulint		n_rows_in_table	= ULINT_UNDEFINED;
+	ibool		is_ok		= TRUE;
+	ulint		old_isolation_level;
 
+	DBUG_ENTER("ha_innobase::check");
 	DBUG_ASSERT(thd == ha_thd());
 	ut_a(prebuilt->trx);
 	ut_a(prebuilt->trx->magic_n == TRX_MAGIC_N);
@@ -7354,17 +7702,140 @@ ha_innobase::check(
 		build_template(prebuilt, NULL, table, ROW_MYSQL_WHOLE_ROW);
 	}
 
-	ret = row_check_table_for_mysql(prebuilt);
+	if (prebuilt->table->ibd_file_missing) {
+		sql_print_error("InnoDB: Error:\n"
+			"InnoDB: MySQL is trying to use a table handle"
+			" but the .ibd file for\n"
+			"InnoDB: table %s does not exist.\n"
+			"InnoDB: Have you deleted the .ibd file"
+			" from the database directory under\n"
+			"InnoDB: the MySQL datadir, or have you"
+			" used DISCARD TABLESPACE?\n"
+			"InnoDB: Please refer to\n"
+			"InnoDB: " REFMAN "innodb-troubleshooting.html\n"
+			"InnoDB: how you can resolve the problem.\n",
+			prebuilt->table->name);
+		DBUG_RETURN(HA_ADMIN_CORRUPT);
+	}
 
-	switch (ret) {
-	case DB_SUCCESS:
-		return(HA_ADMIN_OK);
-	case DB_INTERRUPTED:
+	prebuilt->trx->op_info = "checking table";
+
+	old_isolation_level = prebuilt->trx->isolation_level;
+
+	/* We must run the index record counts at an isolation level
+	>= READ COMMITTED, because a dirty read can see a wrong number
+	of records in some index; to play safe, we use always
+	REPEATABLE READ here */
+
+	prebuilt->trx->isolation_level = TRX_ISO_REPEATABLE_READ;
+
+	/* Enlarge the fatal lock wait timeout during CHECK TABLE. */
+	mutex_enter(&kernel_mutex);
+	srv_fatal_semaphore_wait_threshold += 7200; /* 2 hours */
+	mutex_exit(&kernel_mutex);
+
+	for (index = dict_table_get_first_index(prebuilt->table);
+	     index != NULL;
+	     index = dict_table_get_next_index(index)) {
+#if 0
+		fputs("Validating index ", stderr);
+		ut_print_name(stderr, trx, FALSE, index->name);
+		putc('\n', stderr);
+#endif
+
+		if (!btr_validate_index(index, prebuilt->trx)) {
+			is_ok = FALSE;
+			push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+					    ER_NOT_KEYFILE,
+					    "InnoDB: The B-tree of"
+					    " index '%-.200s' is corrupted.",
+					    index->name);
+			continue;
+		}
+
+		/* Instead of invoking change_active_index(), set up
+		a dummy template for non-locking reads, disabling
+		access to the clustered index. */
+		prebuilt->index = index;
+
+		prebuilt->index_usable = row_merge_is_index_usable(
+			prebuilt->trx, prebuilt->index);
+
+		if (UNIV_UNLIKELY(!prebuilt->index_usable)) {
+			push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+					    HA_ERR_TABLE_DEF_CHANGED,
+					    "InnoDB: Insufficient history for"
+					    " index '%-.200s'",
+					    index->name);
+			continue;
+		}
+
+		prebuilt->sql_stat_start = TRUE;
+		prebuilt->template_type = ROW_MYSQL_DUMMY_TEMPLATE;
+		prebuilt->n_template = 0;
+		prebuilt->need_to_access_clustered = FALSE;
+
+		dtuple_set_n_fields(prebuilt->search_tuple, 0);
+
+		prebuilt->select_lock_type = LOCK_NONE;
+
+		if (!row_check_index_for_mysql(prebuilt, index, &n_rows)) {
+			push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+					    ER_NOT_KEYFILE,
+					    "InnoDB: The B-tree of"
+					    " index '%-.200s' is corrupted.",
+					    index->name);
+			is_ok = FALSE;
+		}
+
+		if (thd_killed(user_thd)) {
+			break;
+		}
+
+#if 0
+		fprintf(stderr, "%lu entries in index %s\n", n_rows,
+			index->name);
+#endif
+
+		if (index == dict_table_get_first_index(prebuilt->table)) {
+			n_rows_in_table = n_rows;
+		} else if (n_rows != n_rows_in_table) {
+			push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+					    ER_NOT_KEYFILE,
+					    "InnoDB: Index '%-.200s'"
+					    " contains %lu entries,"
+					    " should be %lu.",
+					    index->name,
+					    (ulong) n_rows,
+					    (ulong) n_rows_in_table);
+			is_ok = FALSE;
+		}
+	}
+
+	/* Restore the original isolation level */
+	prebuilt->trx->isolation_level = old_isolation_level;
+
+	/* We validate also the whole adaptive hash index for all tables
+	at every CHECK TABLE */
+
+	if (!btr_search_validate()) {
+		push_warning(thd, MYSQL_ERROR::WARN_LEVEL_WARN,
+			     ER_NOT_KEYFILE,
+			     "InnoDB: The adaptive hash index is corrupted.");
+		is_ok = FALSE;
+	}
+
+	/* Restore the fatal lock wait timeout after CHECK TABLE. */
+	mutex_enter(&kernel_mutex);
+	srv_fatal_semaphore_wait_threshold -= 7200; /* 2 hours */
+	mutex_exit(&kernel_mutex);
+
+	prebuilt->trx->op_info = "";
+	if (thd_killed(user_thd)) {
 		my_error(ER_QUERY_INTERRUPTED, MYF(0));
-		return(-1);
-	default:
-		return(HA_ADMIN_CORRUPT);
 	}
+
+	DBUG_RETURN(is_ok ? HA_ADMIN_OK : HA_ADMIN_CORRUPT);
 }
 
 /*************************************************************//**
@@ -8187,8 +8658,8 @@ innodb_show_status(
 
 	mutex_enter(&srv_monitor_file_mutex);
 	rewind(srv_monitor_file);
-	srv_printf_innodb_monitor(srv_monitor_file,
-				&trx_list_start, &trx_list_end);
+	srv_printf_innodb_monitor(srv_monitor_file, FALSE,
+				  &trx_list_start, &trx_list_end);
 	flen = ftell(srv_monitor_file);
 	os_file_set_eof(srv_monitor_file);
 
@@ -8245,19 +8716,25 @@ innodb_show_status(
 }
 
 /************************************************************************//**
-Implements the SHOW MUTEX STATUS command. . */
+Implements the SHOW MUTEX STATUS command.
+@return TRUE on failure, FALSE on success. */
 static
 bool
 innodb_mutex_show_status(
 /*=====================*/
-	handlerton*	hton,	/*!< in: the innodb handlerton */
+	handlerton*	hton,		/*!< in: the innodb handlerton */
 	THD*		thd,		/*!< in: the MySQL query thread of the
 					caller */
-	stat_print_fn*	stat_print)
+	stat_print_fn*	stat_print)	/*!< in: function for printing
+					statistics */
 {
 	char buf1[IO_SIZE], buf2[IO_SIZE];
 	mutex_t*	mutex;
 	rw_lock_t*	lock;
+	ulint		block_mutex_oswait_count = 0;
+	ulint		block_lock_oswait_count = 0;
+	mutex_t*	block_mutex = NULL;
+	rw_lock_t*	block_lock = NULL;
 #ifdef UNIV_DEBUG
 	ulint	  rw_lock_count= 0;
 	ulint	  rw_lock_count_spin_loop= 0;
@@ -8272,12 +8749,16 @@ innodb_mutex_show_status(
 
 	mutex_enter(&mutex_list_mutex);
 
-	mutex = UT_LIST_GET_FIRST(mutex_list);
+	for (mutex = UT_LIST_GET_FIRST(mutex_list); mutex != NULL;
+	     mutex = UT_LIST_GET_NEXT(list, mutex)) {
+		if (mutex->count_os_wait == 0) {
+			continue;
+		}
 
-	while (mutex != NULL) {
-		if (mutex->count_os_wait == 0
-		    || buf_pool_is_block_mutex(mutex)) {
-			goto next_mutex;
+		if (buf_pool_is_block_mutex(mutex)) {
+			block_mutex = mutex;
+			block_mutex_oswait_count += mutex->count_os_wait;
+			continue;
 		}
 #ifdef UNIV_DEBUG
 		if (mutex->mutex_type != 1) {
@@ -8304,8 +8785,7 @@ innodb_mutex_show_status(
 					DBUG_RETURN(1);
 				}
 			}
-		}
-		else {
+		} else {
 			rw_lock_count += mutex->count_using;
 			rw_lock_count_spin_loop += mutex->count_spin_loop;
 			rw_lock_count_spin_rounds += mutex->count_spin_rounds;
@@ -8317,7 +8797,7 @@ innodb_mutex_show_status(
 		buf1len= (uint) my_snprintf(buf1, sizeof(buf1), "%s:%lu",
 				     mutex->cfile_name, (ulong) mutex->cline);
 		buf2len= (uint) my_snprintf(buf2, sizeof(buf2), "os_waits=%lu",
-				     mutex->count_os_wait);
+				     (ulong) mutex->count_os_wait);
 
 		if (stat_print(thd, innobase_hton_name,
 			       hton_name_len, buf1, buf1len,
@@ -8326,45 +8806,83 @@ innodb_mutex_show_status(
 			DBUG_RETURN(1);
 		}
 #endif /* UNIV_DEBUG */
+	}
 
-next_mutex:
-		mutex = UT_LIST_GET_NEXT(list, mutex);
+	if (block_mutex) {
+		buf1len = (uint) my_snprintf(buf1, sizeof buf1,
+					     "combined %s:%lu",
+					     block_mutex->cfile_name,
+					     (ulong) block_mutex->cline);
+		buf2len = (uint) my_snprintf(buf2, sizeof buf2,
+					     "os_waits=%lu",
+					     (ulong) block_mutex_oswait_count);
+
+		if (stat_print(thd, innobase_hton_name,
+			       hton_name_len, buf1, buf1len,
+			       buf2, buf2len)) {
+			mutex_exit(&mutex_list_mutex);
+			DBUG_RETURN(1);
+		}
 	}
 
 	mutex_exit(&mutex_list_mutex);
 
 	mutex_enter(&rw_lock_list_mutex);
 
-	lock = UT_LIST_GET_FIRST(rw_lock_list);
+	for (lock = UT_LIST_GET_FIRST(rw_lock_list); lock != NULL;
+	     lock = UT_LIST_GET_NEXT(list, lock)) {
+		if (lock->count_os_wait == 0) {
+			continue;
+		}
+
+		if (buf_pool_is_block_lock(lock)) {
+			block_lock = lock;
+			block_lock_oswait_count += lock->count_os_wait;
+			continue;
+		}
+
+		buf1len = my_snprintf(buf1, sizeof buf1, "%s:%lu",
+				     lock->cfile_name, (ulong) lock->cline);
+		buf2len = my_snprintf(buf2, sizeof buf2, "os_waits=%lu",
+				      (ulong) lock->count_os_wait);
 
-	while (lock != NULL) {
-		if (lock->count_os_wait
-		    && !buf_pool_is_block_lock(lock)) {
-			buf1len= my_snprintf(buf1, sizeof(buf1), "%s:%lu",
-                                    lock->cfile_name, (ulong) lock->cline);
-			buf2len= my_snprintf(buf2, sizeof(buf2),
-                                    "os_waits=%lu", lock->count_os_wait);
-
-			if (stat_print(thd, innobase_hton_name,
-				       hton_name_len, buf1, buf1len,
-				       buf2, buf2len)) {
-				mutex_exit(&rw_lock_list_mutex);
-				DBUG_RETURN(1);
-			}
+		if (stat_print(thd, innobase_hton_name,
+			       hton_name_len, buf1, buf1len,
+			       buf2, buf2len)) {
+			mutex_exit(&rw_lock_list_mutex);
+			DBUG_RETURN(1);
+		}
+	}
+
+	if (block_lock) {
+		buf1len = (uint) my_snprintf(buf1, sizeof buf1,
+					     "combined %s:%lu",
+					     block_lock->cfile_name,
+					     (ulong) block_lock->cline);
+		buf2len = (uint) my_snprintf(buf2, sizeof buf2,
+					     "os_waits=%lu",
+					     (ulong) block_lock_oswait_count);
+
+		if (stat_print(thd, innobase_hton_name,
+			       hton_name_len, buf1, buf1len,
+			       buf2, buf2len)) {
+			mutex_exit(&rw_lock_list_mutex);
+			DBUG_RETURN(1);
 		}
-		lock = UT_LIST_GET_NEXT(list, lock);
 	}
 
 	mutex_exit(&rw_lock_list_mutex);
 
 #ifdef UNIV_DEBUG
-	buf2len= my_snprintf(buf2, sizeof(buf2),
-		"count=%lu, spin_waits=%lu, spin_rounds=%lu, "
-		"os_waits=%lu, os_yields=%lu, os_wait_times=%lu",
-		rw_lock_count, rw_lock_count_spin_loop,
-		rw_lock_count_spin_rounds,
-		rw_lock_count_os_wait, rw_lock_count_os_yield,
-		(ulong) (rw_lock_wait_time/1000));
+	buf2len = my_snprintf(buf2, sizeof buf2,
+			     "count=%lu, spin_waits=%lu, spin_rounds=%lu, "
+			     "os_waits=%lu, os_yields=%lu, os_wait_times=%lu",
+			      (ulong) rw_lock_count,
+			      (ulong) rw_lock_count_spin_loop,
+			      (ulong) rw_lock_count_spin_rounds,
+			      (ulong) rw_lock_count_os_wait,
+			      (ulong) rw_lock_count_os_yield,
+			      (ulong) (rw_lock_wait_time / 1000));
 
 	if (stat_print(thd, innobase_hton_name, hton_name_len,
 			STRING_WITH_LEN("rw_lock_mutexes"), buf2, buf2len)) {
@@ -8426,6 +8944,11 @@ static INNOBASE_SHARE* get_share(const c
 			    innobase_open_tables, fold, share);
 
 		thr_lock_init(&share->lock);
+
+		/* Index translation table initialization */
+		share->idx_trans_tbl.index_mapping = NULL;
+		share->idx_trans_tbl.index_count = 0;
+		share->idx_trans_tbl.array_size = 0;
 	}
 
 	share->use_count++;
@@ -8456,6 +8979,11 @@ static void free_share(INNOBASE_SHARE* s
 		HASH_DELETE(INNOBASE_SHARE, table_name_hash,
 			    innobase_open_tables, fold, share);
 		thr_lock_delete(&share->lock);
+
+		/* Free any memory from index translation table */
+		my_free(share->idx_trans_tbl.index_mapping,
+			MYF(MY_ALLOW_ZERO_PTR));
+
 		my_free(share, MYF(0));
 
 		/* TODO: invoke HASH_MIGRATE if innobase_open_tables
@@ -8690,7 +9218,10 @@ ha_innobase::innobase_get_autoinc(
 		*value = dict_table_autoinc_read(prebuilt->table);
 
 		/* It should have been initialized during open. */
-		ut_a(*value != 0);
+		if (*value == 0) {
+			prebuilt->autoinc_error = DB_UNSUPPORTED;
+			dict_table_autoinc_unlock(prebuilt->table);
+		}
 	}
 
 	return(prebuilt->autoinc_error);
@@ -8770,6 +9301,11 @@ ha_innobase::get_auto_increment(
 	invoking this method. So we are not sure if it's guaranteed to
 	be 0 or not. */
 
+	/* We need the upper limit of the col type to check for
+	whether we update the table autoinc counter or not. */
+	ulonglong	col_max_value = innobase_get_int_col_max_value(
+		table->next_number_field);
+
 	/* Called for the first time ? */
 	if (trx->n_autoinc_rows == 0) {
 
@@ -8786,6 +9322,11 @@ ha_innobase::get_auto_increment(
 	/* Not in the middle of a mult-row INSERT. */
 	} else if (prebuilt->autoinc_last_value == 0) {
 		set_if_bigger(*first_value, autoinc);
+	/* Check for -ve values. */
+	} else if (*first_value > col_max_value && trx->n_autoinc_rows > 0) {
+		/* Set to next logical value. */
+		ut_a(autoinc > trx->n_autoinc_rows);
+		*first_value = (autoinc - trx->n_autoinc_rows) - 1;
 	}
 
 	*nb_reserved_values = trx->n_autoinc_rows;
@@ -8796,12 +9337,6 @@ ha_innobase::get_auto_increment(
 		ulonglong	need;
 		ulonglong	current;
 		ulonglong	next_value;
-		ulonglong	col_max_value;
-
-		/* We need the upper limit of the col type to check for
-		whether we update the table autoinc counter or not. */
-		col_max_value = innobase_get_int_col_max_value(
-			table->next_number_field);
 
 		current = *first_value > col_max_value ? autoinc : *first_value;
 		need = *nb_reserved_values * increment;
@@ -9298,33 +9833,60 @@ innobase_set_cursor_view(
 				  (cursor_view_t*) curview);
 }
 
+/*******************************************************************//**
+If col_name is not NULL, check whether the named column is being
+renamed in the table. If col_name is not provided, check
+whether any one of columns in the table is being renamed.
+@return true if the column is being renamed */
+static
+bool
+check_column_being_renamed(
+/*=======================*/
+	const TABLE*	table,		/*!< in: MySQL table */
+	const char*	col_name)	/*!< in: name of the column */
+{
+	uint		k;
+	Field*		field;
 
-/***********************************************************************
-Check whether any of the given columns is being renamed in the table. */
+	for (k = 0; k < table->s->fields; k++) {
+		field = table->field[k];
+
+		if (field->flags & FIELD_IS_RENAMED) {
+
+			/* If col_name is not provided, return
+			if the field is marked as being renamed. */
+			if (!col_name) {
+				return(true);
+			}
+
+			/* If col_name is provided, return only
+			if names match */
+			if (innobase_strcasecmp(field->field_name,
+						col_name) == 0) {
+				return(true);
+			}
+		}
+	}
+
+	return(false);
+}
+
+/*******************************************************************//**
+Check whether any of the given columns is being renamed in the table.
+@return true if any of col_names is being renamed in table */
 static
 bool
 column_is_being_renamed(
 /*====================*/
-					/* out: true if any of col_names is
-					being renamed in table */
-	TABLE*		table,		/* in: MySQL table */
-	uint		n_cols,		/* in: number of columns */
-	const char**	col_names)	/* in: names of the columns */
+	TABLE*		table,		/*!< in: MySQL table */
+	uint		n_cols,		/*!< in: number of columns */
+	const char**	col_names)	/*!< in: names of the columns */
 {
 	uint		j;
-	uint		k;
-	Field*		field;
-	const char*	col_name;
 
 	for (j = 0; j < n_cols; j++) {
-		col_name = col_names[j];
-		for (k = 0; k < table->s->fields; k++) {
-			field = table->field[k];
-			if ((field->flags & FIELD_IS_RENAMED)
-			    && innobase_strcasecmp(field->field_name,
-						   col_name) == 0) {
-				return(true);
-			}
+		if (check_column_being_renamed(table, col_names[j])) {
+			return(true);
 		}
 	}
 
@@ -9408,6 +9970,15 @@ ha_innobase::check_if_incompatible_data(
 		return(COMPATIBLE_DATA_NO);
 	}
 
+	/* For column rename operation, MySQL does not supply enough
+	information (new column name etc.) for InnoDB to make appropriate
+	system metadata change. To avoid system metadata inconsistency,
+	currently we can just request a table rebuild/copy by returning
+	COMPATIBLE_DATA_NO */
+	if (check_column_being_renamed(table, NULL)) {
+		return COMPATIBLE_DATA_NO;
+	}
+
 	/* Check if a column participating in a foreign key is being renamed.
 	There is no mechanism for updating InnoDB foreign key definitions. */
 	if (foreign_key_column_is_being_renamed(prebuilt, table)) {
@@ -10153,13 +10724,13 @@ static MYSQL_SYSVAR_BOOL(use_sys_malloc,
 static MYSQL_SYSVAR_STR(change_buffering, innobase_change_buffering,
   PLUGIN_VAR_RQCMDARG,
   "Buffer changes to reduce random access: "
-  "OFF, ON, inserting, deleting, changing, or purging.",
+  "OFF, ON, none, inserts.",
   innodb_change_buffering_validate,
   innodb_change_buffering_update, NULL);
 
 static MYSQL_SYSVAR_ULONG(read_ahead_threshold, srv_read_ahead_threshold,
   PLUGIN_VAR_RQCMDARG,
-  "Number of pages that must be accessed sequentially for InnoDB to"
+  "Number of pages that must be accessed sequentially for InnoDB to "
   "trigger a readahead.",
   NULL, NULL, 56, 0, 64, 0);
 

=== modified file 'storage/innodb_plugin/handler/ha_innodb.h'
--- a/storage/innodb_plugin/handler/ha_innodb.h	2009-11-30 12:11:36 +0000
+++ b/storage/innodb_plugin/handler/ha_innodb.h	2010-04-01 11:59:25 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2000, 2009, MySQL AB & Innobase Oy. All Rights Reserved.
+Copyright (c) 2000, 2010, MySQL AB & Innobase Oy. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -27,15 +27,31 @@ Place, Suite 330, Boston, MA 02111-1307 
 #pragma interface			/* gcc class implementation */
 #endif
 
+/* Structure defines translation table between mysql index and innodb
+index structures */
+typedef struct innodb_idx_translate_struct {
+	ulint		index_count;	/*!< number of valid index entries
+					in the index_mapping array */
+	ulint		array_size;	/*!< array size of index_mapping */
+	dict_index_t**	index_mapping;	/*!< index pointer array directly
+					maps to index in Innodb from MySQL
+					array index */
+} innodb_idx_translate_t;
+
+
 /** InnoDB table share */
 typedef struct st_innobase_share {
-	THR_LOCK	lock;		/*!< MySQL lock protecting
-					this structure */
-	const char*	table_name;	/*!< InnoDB table name */
-	uint		use_count;	/*!< reference count,
-					incremented in get_share()
-					and decremented in free_share() */
-	void*		table_name_hash;/*!< hash table chain node */
+	THR_LOCK		lock;		/*!< MySQL lock protecting
+						this structure */
+	const char*		table_name;	/*!< InnoDB table name */
+	uint			use_count;	/*!< reference count,
+						incremented in get_share()
+						and decremented in
+						free_share() */
+	void*			table_name_hash;/*!< hash table chain node */
+	innodb_idx_translate_t	idx_trans_tbl;	/*!< index translation
+						table between MySQL and
+						Innodb */
 } INNOBASE_SHARE;
 
 
@@ -91,9 +107,8 @@ class ha_innobase: public handler
 	ulint innobase_reset_autoinc(ulonglong auto_inc);
 	ulint innobase_get_autoinc(ulonglong* value);
 	ulint innobase_update_autoinc(ulonglong	auto_inc);
-	ulint innobase_initialize_autoinc();
+	void innobase_initialize_autoinc();
 	dict_index_t* innobase_get_index(uint keynr);
- 	ulonglong innobase_get_int_col_max_value(const Field* field);
 
 	/* Init values for the class: */
  public:

=== modified file 'storage/innodb_plugin/handler/handler0alter.cc'
--- a/storage/innodb_plugin/handler/handler0alter.cc	2009-11-30 13:42:26 +0000
+++ b/storage/innodb_plugin/handler/handler0alter.cc	2010-04-01 12:12:44 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2005, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2005, 2010, Innobase Oy. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -229,9 +229,11 @@ static
 int
 innobase_check_index_keys(
 /*======================*/
-	const KEY*	key_info,	/*!< in: Indexes to be created */
-	ulint		num_of_keys)	/*!< in: Number of indexes to
-					be created */
+	const KEY*		key_info,	/*!< in: Indexes to be
+						created */
+	ulint			num_of_keys,	/*!< in: Number of
+						indexes to be created */
+	const dict_table_t*	table)		/*!< in: Existing indexes */
 {
 	ulint		key_num;
 
@@ -248,9 +250,22 @@ innobase_check_index_keys(
 			const KEY&	key2 = key_info[i];
 
 			if (0 == strcmp(key.name, key2.name)) {
-				sql_print_error("InnoDB: key name `%s` appears"
-						" twice in CREATE INDEX\n",
-						key.name);
+				my_error(ER_WRONG_NAME_FOR_INDEX, MYF(0),
+					 key.name);
+
+				return(ER_WRONG_NAME_FOR_INDEX);
+			}
+		}
+
+		/* Check that the same index name does not already exist. */
+
+		for (const dict_index_t* index
+			     = dict_table_get_first_index(table);
+		     index; index = dict_table_get_next_index(index)) {
+
+			if (0 == strcmp(key.name, index->name)) {
+				my_error(ER_WRONG_NAME_FOR_INDEX, MYF(0),
+					 key.name);
 
 				return(ER_WRONG_NAME_FOR_INDEX);
 			}
@@ -258,7 +273,7 @@ innobase_check_index_keys(
 
 		/* Check that MySQL does not try to create a column
 		prefix index field on an inappropriate data type and
-		that the same colum does not appear twice in the index. */
+		that the same column does not appear twice in the index. */
 
 		for (ulint i = 0; i < key.key_parts; i++) {
 			const KEY_PART_INFO&	key_part1
@@ -289,14 +304,8 @@ innobase_check_index_keys(
 					}
 				}
 
-				sql_print_error("InnoDB: MySQL is trying to"
-						" create a column prefix"
-						" index field on an"
-						" inappropriate data type."
-						" column `%s`,"
-						" index `%s`.\n",
-						field->field_name,
-						key.name);
+				my_error(ER_WRONG_KEY_COLUMN, MYF(0),
+					 field->field_name);
 				return(ER_WRONG_KEY_COLUMN);
 			}
 
@@ -309,11 +318,8 @@ innobase_check_index_keys(
 					continue;
 				}
 
-				sql_print_error("InnoDB: column `%s`"
-						" is not allowed to occur"
-						" twice in index `%s`.\n",
-						key_part1.field->field_name,
-						key.name);
+				my_error(ER_WRONG_KEY_COLUMN, MYF(0),
+					 key_part1.field->field_name);
 				return(ER_WRONG_KEY_COLUMN);
 			}
 		}
@@ -522,12 +528,14 @@ innobase_create_key_def(
 				     key_info->name, "PRIMARY");
 
 	/* If there is a UNIQUE INDEX consisting entirely of NOT NULL
-	columns, MySQL will treat it as a PRIMARY KEY unless the
-	table already has one. */
+	columns and if the index does not contain column prefix(es)
+	(only prefix/part of the column is indexed), MySQL will treat the
+	index as a PRIMARY KEY unless the table already has one. */
 
 	if (!new_primary && (key_info->flags & HA_NOSAME)
+	    && (!(key_info->flags & HA_KEY_HAS_PART_KEY_SEG))
 	    && row_table_got_default_clust_index(table)) {
-		uint	key_part = key_info->key_parts;
+		uint    key_part = key_info->key_parts;
 
 		new_primary = TRUE;
 
@@ -656,12 +664,18 @@ ha_innobase::add_index(
 	innodb_table = indexed_table
 		= dict_table_get(prebuilt->table->name, FALSE);
 
+	if (UNIV_UNLIKELY(!innodb_table)) {
+		error = HA_ERR_NO_SUCH_TABLE;
+		goto err_exit;
+	}
+
 	/* Check if the index name is reserved. */
 	if (innobase_index_name_is_reserved(trx, key_info, num_of_keys)) {
 		error = -1;
 	} else {
 		/* Check that index keys are sensible */
-		error = innobase_check_index_keys(key_info, num_of_keys);
+		error = innobase_check_index_keys(key_info, num_of_keys,
+						  innodb_table);
 	}
 
 	if (UNIV_UNLIKELY(error)) {
@@ -764,6 +778,10 @@ err_exit:
 
 	ut_ad(error == DB_SUCCESS);
 
+	/* We will need to rebuild index translation table. Set
+	valid index entry count in the translation table to zero */
+	share->idx_trans_tbl.index_count = 0;
+
 	/* Commit the data dictionary transaction in order to release
 	the table locks on the system tables.  This means that if
 	MySQL crashes while creating a new primary key inside
@@ -799,18 +817,6 @@ err_exit:
 					index, num_of_idx, table);
 
 error_handling:
-#ifdef UNIV_DEBUG
-	/* TODO: At the moment we can't handle the following statement
-	in our debugging code below:
-
-	alter table t drop index b, add index (b);
-
-	The fix will have to parse the SQL and note that the index
-	being added has the same name as the one being dropped and
-	ignore that in the dup index check.*/
-	//dict_table_check_for_dup_indexes(prebuilt->table);
-#endif
-
 	/* After an error, remove all those index definitions from the
 	dictionary which were defined. */
 
@@ -822,6 +828,8 @@ error_handling:
 		row_mysql_lock_data_dictionary(trx);
 		dict_locked = TRUE;
 
+		ut_d(dict_table_check_for_dup_indexes(prebuilt->table));
+
 		if (!new_primary) {
 			error = row_merge_rename_indexes(trx, indexed_table);
 
@@ -1198,9 +1206,11 @@ ha_innobase::final_drop_index(
 		ut_a(!index->to_be_dropped);
 	}
 
-#ifdef UNIV_DEBUG
-	dict_table_check_for_dup_indexes(prebuilt->table);
-#endif
+	/* We will need to rebuild index translation table. Set
+	valid index entry count in the translation table to zero */
+	share->idx_trans_tbl.index_count = 0;
+
+	ut_d(dict_table_check_for_dup_indexes(prebuilt->table));
 
 func_exit:
 	trx_commit_for_mysql(trx);

=== modified file 'storage/innodb_plugin/ibuf/ibuf0ibuf.c'
--- a/storage/innodb_plugin/ibuf/ibuf0ibuf.c	2009-11-30 11:32:05 +0000
+++ b/storage/innodb_plugin/ibuf/ibuf0ibuf.c	2010-04-01 11:17:10 +0000
@@ -730,24 +730,41 @@ page containing the descriptor bits for 
 is x-latched */
 static
 page_t*
-ibuf_bitmap_get_map_page(
-/*=====================*/
-	ulint	space,	/*!< in: space id of the file page */
-	ulint	page_no,/*!< in: page number of the file page */
-	ulint	zip_size,/*!< in: compressed page size in bytes;
-			0 for uncompressed pages */
-	mtr_t*	mtr)	/*!< in: mtr */
+ibuf_bitmap_get_map_page_func(
+/*==========================*/
+	ulint		space,	/*!< in: space id of the file page */
+	ulint		page_no,/*!< in: page number of the file page */
+	ulint		zip_size,/*!< in: compressed page size in bytes;
+				0 for uncompressed pages */
+	const char*	file,	/*!< in: file name */
+	ulint		line,	/*!< in: line where called */
+	mtr_t*		mtr)	/*!< in: mtr */
 {
 	buf_block_t*	block;
 
-	block = buf_page_get(space, zip_size,
-			     ibuf_bitmap_page_no_calc(zip_size, page_no),
-			     RW_X_LATCH, mtr);
+	block = buf_page_get_gen(space, zip_size,
+				 ibuf_bitmap_page_no_calc(zip_size, page_no),
+				 RW_X_LATCH, NULL, BUF_GET,
+				 file, line, mtr);
 	buf_block_dbg_add_level(block, SYNC_IBUF_BITMAP);
 
 	return(buf_block_get_frame(block));
 }
 
+/********************************************************************//**
+Gets the ibuf bitmap page where the bits describing a given file page are
+stored.
+@return bitmap page where the file page is mapped, that is, the bitmap
+page containing the descriptor bits for the file page; the bitmap page
+is x-latched
+@param space	in: space id of the file page
+@param page_no	in: page number of the file page
+@param zip_size	in: compressed page size in bytes; 0 for uncompressed pages
+@param mtr	in: mini-transaction */
+#define ibuf_bitmap_get_map_page(space, page_no, zip_size, mtr)		\
+	ibuf_bitmap_get_map_page_func(space, page_no, zip_size,		\
+				      __FILE__, __LINE__, mtr)
+
 /************************************************************************//**
 Sets the free bits of the page in the ibuf bitmap. This is done in a separate
 mini-transaction, hence this operation does not restrict further work to only

=== modified file 'storage/innodb_plugin/include/btr0btr.h'
--- a/storage/innodb_plugin/include/btr0btr.h	2009-05-27 09:45:59 +0000
+++ b/storage/innodb_plugin/include/btr0btr.h	2010-04-01 12:02:34 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -193,6 +193,10 @@ btr_leaf_page_release(
 	mtr_t*		mtr);		/*!< in: mtr */
 /**************************************************************//**
 Gets the child node file address in a node pointer.
+NOTE: the offsets array must contain all offsets for the record since
+we read the last field according to offsets and assume that it contains
+the child page number. In other words offsets must have been retrieved
+with rec_get_offsets(n_fields=ULINT_UNDEFINED).
 @return	child node address */
 UNIV_INLINE
 ulint
@@ -317,12 +321,16 @@ Inserts a data tuple to a tree on a non-
 that mtr holds an x-latch on the tree. */
 UNIV_INTERN
 void
-btr_insert_on_non_leaf_level(
-/*=========================*/
+btr_insert_on_non_leaf_level_func(
+/*==============================*/
 	dict_index_t*	index,	/*!< in: index */
 	ulint		level,	/*!< in: level, must be > 0 */
 	dtuple_t*	tuple,	/*!< in: the record to be inserted */
+	const char*	file,	/*!< in: file name */
+	ulint		line,	/*!< in: line where called */
 	mtr_t*		mtr);	/*!< in: mtr */
+# define btr_insert_on_non_leaf_level(i,l,t,m)				\
+	btr_insert_on_non_leaf_level_func(i,l,t,__FILE__,__LINE__,m)
 #endif /* !UNIV_HOTBACKUP */
 /****************************************************************//**
 Sets a record as the predefined minimum record. */

=== modified file 'storage/innodb_plugin/include/btr0btr.ic'
--- a/storage/innodb_plugin/include/btr0btr.ic	2009-05-27 09:45:59 +0000
+++ b/storage/innodb_plugin/include/btr0btr.ic	2010-04-01 13:01:22 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -255,6 +255,10 @@ btr_page_set_prev(
 
 /**************************************************************//**
 Gets the child node file address in a node pointer.
+NOTE: the offsets array must contain all offsets for the record since
+we read the last field according to offsets and assume that it contains
+the child page number. In other words offsets must have been retrieved
+with rec_get_offsets(n_fields=ULINT_UNDEFINED).
 @return	child node address */
 UNIV_INLINE
 ulint

=== modified file 'storage/innodb_plugin/include/btr0cur.h'
--- a/storage/innodb_plugin/include/btr0cur.h	2009-09-02 23:48:06 +0000
+++ b/storage/innodb_plugin/include/btr0cur.h	2010-04-01 11:59:25 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -152,29 +152,39 @@ btr_cur_search_to_nth_level(
 	ulint		has_search_latch,/*!< in: latch mode the caller
 				currently has on btr_search_latch:
 				RW_S_LATCH, or 0 */
+	const char*	file,	/*!< in: file name */
+	ulint		line,	/*!< in: line where called */
 	mtr_t*		mtr);	/*!< in: mtr */
 /*****************************************************************//**
 Opens a cursor at either end of an index. */
 UNIV_INTERN
 void
-btr_cur_open_at_index_side(
-/*=======================*/
+btr_cur_open_at_index_side_func(
+/*============================*/
 	ibool		from_left,	/*!< in: TRUE if open to the low end,
 					FALSE if to the high end */
 	dict_index_t*	index,		/*!< in: index */
 	ulint		latch_mode,	/*!< in: latch mode */
 	btr_cur_t*	cursor,		/*!< in: cursor */
+	const char*	file,		/*!< in: file name */
+	ulint		line,		/*!< in: line where called */
 	mtr_t*		mtr);		/*!< in: mtr */
+#define btr_cur_open_at_index_side(f,i,l,c,m)				\
+	btr_cur_open_at_index_side_func(f,i,l,c,__FILE__,__LINE__,m)
 /**********************************************************************//**
 Positions a cursor at a randomly chosen position within a B-tree. */
 UNIV_INTERN
 void
-btr_cur_open_at_rnd_pos(
-/*====================*/
+btr_cur_open_at_rnd_pos_func(
+/*=========================*/
 	dict_index_t*	index,		/*!< in: index */
 	ulint		latch_mode,	/*!< in: BTR_SEARCH_LEAF, ... */
 	btr_cur_t*	cursor,		/*!< in/out: B-tree cursor */
+	const char*	file,		/*!< in: file name */
+	ulint		line,		/*!< in: line where called */
 	mtr_t*		mtr);		/*!< in: mtr */
+#define btr_cur_open_at_rnd_pos(i,l,c,m)				\
+	btr_cur_open_at_rnd_pos_func(i,l,c,__FILE__,__LINE__,m)
 /*************************************************************//**
 Tries to perform an insert to a page in an index tree, next to cursor.
 It is assumed that mtr holds an x-latch on the page. The operation does

=== modified file 'storage/innodb_plugin/include/btr0pcur.h'
--- a/storage/innodb_plugin/include/btr0pcur.h	2009-05-27 09:45:59 +0000
+++ b/storage/innodb_plugin/include/btr0pcur.h	2010-04-01 11:59:25 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -82,8 +82,8 @@ Initializes and opens a persistent curso
 closed with btr_pcur_close. */
 UNIV_INLINE
 void
-btr_pcur_open(
-/*==========*/
+btr_pcur_open_func(
+/*===============*/
 	dict_index_t*	index,	/*!< in: index */
 	const dtuple_t*	tuple,	/*!< in: tuple on which search done */
 	ulint		mode,	/*!< in: PAGE_CUR_L, ...;
@@ -94,14 +94,18 @@ btr_pcur_open(
 				record! */
 	ulint		latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */
 	btr_pcur_t*	cursor, /*!< in: memory buffer for persistent cursor */
+	const char*	file,	/*!< in: file name */
+	ulint		line,	/*!< in: line where called */
 	mtr_t*		mtr);	/*!< in: mtr */
+#define btr_pcur_open(i,t,md,l,c,m)				\
+	btr_pcur_open_func(i,t,md,l,c,__FILE__,__LINE__,m)
 /**************************************************************//**
 Opens an persistent cursor to an index tree without initializing the
 cursor. */
 UNIV_INLINE
 void
-btr_pcur_open_with_no_init(
-/*=======================*/
+btr_pcur_open_with_no_init_func(
+/*============================*/
 	dict_index_t*	index,	/*!< in: index */
 	const dtuple_t*	tuple,	/*!< in: tuple on which search done */
 	ulint		mode,	/*!< in: PAGE_CUR_L, ...;
@@ -119,7 +123,12 @@ btr_pcur_open_with_no_init(
 	ulint		has_search_latch,/*!< in: latch mode the caller
 				currently has on btr_search_latch:
 				RW_S_LATCH, or 0 */
+	const char*	file,	/*!< in: file name */
+	ulint		line,	/*!< in: line where called */
 	mtr_t*		mtr);	/*!< in: mtr */
+#define btr_pcur_open_with_no_init(ix,t,md,l,cur,has,m)			\
+	btr_pcur_open_with_no_init_func(ix,t,md,l,cur,has,__FILE__,__LINE__,m)
+
 /*****************************************************************//**
 Opens a persistent cursor at either end of an index. */
 UNIV_INLINE
@@ -160,8 +169,8 @@ before first in tree. The latching mode 
 BTR_MODIFY_LEAF. */
 UNIV_INTERN
 void
-btr_pcur_open_on_user_rec(
-/*======================*/
+btr_pcur_open_on_user_rec_func(
+/*===========================*/
 	dict_index_t*	index,		/*!< in: index */
 	const dtuple_t*	tuple,		/*!< in: tuple on which search done */
 	ulint		mode,		/*!< in: PAGE_CUR_L, ... */
@@ -169,17 +178,25 @@ btr_pcur_open_on_user_rec(
 					BTR_MODIFY_LEAF */
 	btr_pcur_t*	cursor,		/*!< in: memory buffer for persistent
 					cursor */
+	const char*	file,		/*!< in: file name */
+	ulint		line,		/*!< in: line where called */
 	mtr_t*		mtr);		/*!< in: mtr */
+#define btr_pcur_open_on_user_rec(i,t,md,l,c,m)				\
+	btr_pcur_open_on_user_rec_func(i,t,md,l,c,__FILE__,__LINE__,m)
 /**********************************************************************//**
 Positions a cursor at a randomly chosen position within a B-tree. */
 UNIV_INLINE
 void
-btr_pcur_open_at_rnd_pos(
-/*=====================*/
+btr_pcur_open_at_rnd_pos_func(
+/*==========================*/
 	dict_index_t*	index,		/*!< in: index */
 	ulint		latch_mode,	/*!< in: BTR_SEARCH_LEAF, ... */
 	btr_pcur_t*	cursor,		/*!< in/out: B-tree pcur */
+	const char*	file,		/*!< in: file name */
+	ulint		line,		/*!< in: line where called */
 	mtr_t*		mtr);		/*!< in: mtr */
+#define btr_pcur_open_at_rnd_pos(i,l,c,m)				\
+	btr_pcur_open_at_rnd_pos_func(i,l,c,__FILE__,__LINE__,m)
 /**************************************************************//**
 Frees the possible old_rec_buf buffer of a persistent cursor and sets the
 latch mode of the persistent cursor to BTR_NO_LATCHES. */
@@ -218,11 +235,15 @@ record and it can be restored on a user 
 are identical to the ones of the original user record */
 UNIV_INTERN
 ibool
-btr_pcur_restore_position(
-/*======================*/
+btr_pcur_restore_position_func(
+/*===========================*/
 	ulint		latch_mode,	/*!< in: BTR_SEARCH_LEAF, ... */
 	btr_pcur_t*	cursor,		/*!< in: detached persistent cursor */
+	const char*	file,		/*!< in: file name */
+	ulint		line,		/*!< in: line where called */
 	mtr_t*		mtr);		/*!< in: mtr */
+#define btr_pcur_restore_position(l,cur,mtr)				\
+	btr_pcur_restore_position_func(l,cur,__FILE__,__LINE__,mtr)
 /**************************************************************//**
 If the latch mode of the cursor is BTR_LEAF_SEARCH or BTR_LEAF_MODIFY,
 releases the page latch and bufferfix reserved by the cursor.
@@ -260,20 +281,13 @@ btr_pcur_get_mtr(
 /*=============*/
 	btr_pcur_t*	cursor);	/*!< in: persistent cursor */
 /**************************************************************//**
-Commits the pcur mtr and sets the pcur latch mode to BTR_NO_LATCHES,
+Commits the mtr and sets the pcur latch mode to BTR_NO_LATCHES,
 that is, the cursor becomes detached. If there have been modifications
 to the page where pcur is positioned, this can be used instead of
 btr_pcur_release_leaf. Function btr_pcur_store_position should be used
 before calling this, if restoration of cursor is wanted later. */
 UNIV_INLINE
 void
-btr_pcur_commit(
-/*============*/
-	btr_pcur_t*	pcur);	/*!< in: persistent cursor */
-/**************************************************************//**
-Differs from btr_pcur_commit in that we can specify the mtr to commit. */
-UNIV_INLINE
-void
 btr_pcur_commit_specify_mtr(
 /*========================*/
 	btr_pcur_t*	pcur,	/*!< in: persistent cursor */

=== modified file 'storage/innodb_plugin/include/btr0pcur.ic'
--- a/storage/innodb_plugin/include/btr0pcur.ic	2009-05-27 09:45:59 +0000
+++ b/storage/innodb_plugin/include/btr0pcur.ic	2010-04-01 11:59:25 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -395,30 +395,13 @@ btr_pcur_move_to_next(
 }
 
 /**************************************************************//**
-Commits the pcur mtr and sets the pcur latch mode to BTR_NO_LATCHES,
+Commits the mtr and sets the pcur latch mode to BTR_NO_LATCHES,
 that is, the cursor becomes detached. If there have been modifications
 to the page where pcur is positioned, this can be used instead of
 btr_pcur_release_leaf. Function btr_pcur_store_position should be used
 before calling this, if restoration of cursor is wanted later. */
 UNIV_INLINE
 void
-btr_pcur_commit(
-/*============*/
-	btr_pcur_t*	pcur)	/*!< in: persistent cursor */
-{
-	ut_a(pcur->pos_state == BTR_PCUR_IS_POSITIONED);
-
-	pcur->latch_mode = BTR_NO_LATCHES;
-
-	mtr_commit(pcur->mtr);
-
-	pcur->pos_state = BTR_PCUR_WAS_POSITIONED;
-}
-
-/**************************************************************//**
-Differs from btr_pcur_commit in that we can specify the mtr to commit. */
-UNIV_INLINE
-void
 btr_pcur_commit_specify_mtr(
 /*========================*/
 	btr_pcur_t*	pcur,	/*!< in: persistent cursor */
@@ -483,8 +466,8 @@ Initializes and opens a persistent curso
 closed with btr_pcur_close. */
 UNIV_INLINE
 void
-btr_pcur_open(
-/*==========*/
+btr_pcur_open_func(
+/*===============*/
 	dict_index_t*	index,	/*!< in: index */
 	const dtuple_t*	tuple,	/*!< in: tuple on which search done */
 	ulint		mode,	/*!< in: PAGE_CUR_L, ...;
@@ -495,6 +478,8 @@ btr_pcur_open(
 				record! */
 	ulint		latch_mode,/*!< in: BTR_SEARCH_LEAF, ... */
 	btr_pcur_t*	cursor, /*!< in: memory buffer for persistent cursor */
+	const char*	file,	/*!< in: file name */
+	ulint		line,	/*!< in: line where called */
 	mtr_t*		mtr)	/*!< in: mtr */
 {
 	btr_cur_t*	btr_cursor;
@@ -511,7 +496,7 @@ btr_pcur_open(
 	btr_cursor = btr_pcur_get_btr_cur(cursor);
 
 	btr_cur_search_to_nth_level(index, 0, tuple, mode, latch_mode,
-				    btr_cursor, 0, mtr);
+				    btr_cursor, 0, file, line, mtr);
 	cursor->pos_state = BTR_PCUR_IS_POSITIONED;
 
 	cursor->trx_if_known = NULL;
@@ -522,8 +507,8 @@ Opens an persistent cursor to an index t
 cursor. */
 UNIV_INLINE
 void
-btr_pcur_open_with_no_init(
-/*=======================*/
+btr_pcur_open_with_no_init_func(
+/*============================*/
 	dict_index_t*	index,	/*!< in: index */
 	const dtuple_t*	tuple,	/*!< in: tuple on which search done */
 	ulint		mode,	/*!< in: PAGE_CUR_L, ...;
@@ -541,6 +526,8 @@ btr_pcur_open_with_no_init(
 	ulint		has_search_latch,/*!< in: latch mode the caller
 				currently has on btr_search_latch:
 				RW_S_LATCH, or 0 */
+	const char*	file,	/*!< in: file name */
+	ulint		line,	/*!< in: line where called */
 	mtr_t*		mtr)	/*!< in: mtr */
 {
 	btr_cur_t*	btr_cursor;
@@ -553,7 +540,8 @@ btr_pcur_open_with_no_init(
 	btr_cursor = btr_pcur_get_btr_cur(cursor);
 
 	btr_cur_search_to_nth_level(index, 0, tuple, mode, latch_mode,
-				    btr_cursor, has_search_latch, mtr);
+				    btr_cursor, has_search_latch,
+				    file, line, mtr);
 	cursor->pos_state = BTR_PCUR_IS_POSITIONED;
 
 	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
@@ -600,11 +588,13 @@ btr_pcur_open_at_index_side(
 Positions a cursor at a randomly chosen position within a B-tree. */
 UNIV_INLINE
 void
-btr_pcur_open_at_rnd_pos(
-/*=====================*/
+btr_pcur_open_at_rnd_pos_func(
+/*==========================*/
 	dict_index_t*	index,		/*!< in: index */
 	ulint		latch_mode,	/*!< in: BTR_SEARCH_LEAF, ... */
 	btr_pcur_t*	cursor,		/*!< in/out: B-tree pcur */
+	const char*	file,		/*!< in: file name */
+	ulint		line,		/*!< in: line where called */
 	mtr_t*		mtr)		/*!< in: mtr */
 {
 	/* Initialize the cursor */
@@ -614,8 +604,9 @@ btr_pcur_open_at_rnd_pos(
 
 	btr_pcur_init(cursor);
 
-	btr_cur_open_at_rnd_pos(index, latch_mode,
-				btr_pcur_get_btr_cur(cursor), mtr);
+	btr_cur_open_at_rnd_pos_func(index, latch_mode,
+				     btr_pcur_get_btr_cur(cursor),
+				     file, line, mtr);
 	cursor->pos_state = BTR_PCUR_IS_POSITIONED;
 	cursor->old_stored = BTR_PCUR_OLD_NOT_STORED;
 

=== modified file 'storage/innodb_plugin/include/buf0buf.h'
--- a/storage/innodb_plugin/include/buf0buf.h	2009-11-03 10:26:07 +0000
+++ b/storage/innodb_plugin/include/buf0buf.h	2010-04-01 12:56:22 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -33,6 +33,7 @@ Created 11/5/1995 Heikki Tuuri
 #include "hash0hash.h"
 #include "ut0byte.h"
 #include "page0types.h"
+#include "ut0rbt.h"
 #ifndef UNIV_HOTBACKUP
 #include "os0proc.h"
 
@@ -202,20 +203,14 @@ with care. */
 #define buf_page_get_with_no_latch(SP, ZS, OF, MTR)	   buf_page_get_gen(\
 				SP, ZS, OF, RW_NO_LATCH, NULL,\
 				BUF_GET_NO_LATCH, __FILE__, __LINE__, MTR)
-/**************************************************************//**
-NOTE! The following macros should be used instead of
-buf_page_optimistic_get_func, to improve debugging. Only values RW_S_LATCH and
-RW_X_LATCH are allowed as LA! */
-#define buf_page_optimistic_get(LA, BL, MC, MTR)			     \
-	buf_page_optimistic_get_func(LA, BL, MC, __FILE__, __LINE__, MTR)
 /********************************************************************//**
 This is the general function used to get optimistic access to a database
 page.
 @return	TRUE if success */
 UNIV_INTERN
 ibool
-buf_page_optimistic_get_func(
-/*=========================*/
+buf_page_optimistic_get(
+/*====================*/
 	ulint		rw_latch,/*!< in: RW_S_LATCH, RW_X_LATCH */
 	buf_block_t*	block,	/*!< in: guessed block */
 	ib_uint64_t	modify_clock,/*!< in: modify clock value if mode is
@@ -1185,15 +1180,21 @@ struct buf_block_struct{
 	rw_lock_t	lock;		/*!< read-write lock of the buffer
 					frame */
 	unsigned	lock_hash_val:32;/*!< hashed value of the page address
-					in the record lock hash table */
-	unsigned	check_index_page_at_flush:1;
+					in the record lock hash table;
+					protected by buf_block_t::lock
+					(or buf_block_t::mutex, buf_pool_mutex
+				        in buf_page_get_gen(),
+					buf_page_init_for_read()
+					and buf_page_create()) */
+	ibool		check_index_page_at_flush;
 					/*!< TRUE if we know that this is
 					an index page, and want the database
 					to check its consistency before flush;
 					note that there may be pages in the
 					buffer pool which are index pages,
 					but this flag is not set because
-					we do not keep track of all pages */
+					we do not keep track of all pages;
+					NOT protected by any mutex */
 	/* @} */
 	/** @name Optimistic search field */
 	/* @{ */
@@ -1359,6 +1360,19 @@ struct buf_pool_struct{
 					/*!< this is in the set state
 					when there is no flush batch
 					of the given type running */
+	ib_rbt_t*	flush_rbt;	/* !< a red-black tree is used
+					exclusively during recovery to
+					speed up insertions in the
+					flush_list. This tree contains
+					blocks in order of
+					oldest_modification LSN and is
+					kept in sync with the
+					flush_list.
+					Each member of the tree MUST
+					also be on the flush_list.
+					This tree is relevant only in
+					recovery and is set to NULL
+					once the recovery is over. */
 	ulint		freed_page_clock;/*!< a sequence number used
 					to count the number of buffer
 					blocks removed from the end of

=== modified file 'storage/innodb_plugin/include/buf0buf.ic'
--- a/storage/innodb_plugin/include/buf0buf.ic	2009-11-03 10:26:07 +0000
+++ b/storage/innodb_plugin/include/buf0buf.ic	2010-04-01 13:01:22 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
 Copyright (c) 2008, Google Inc.
 
 Portions of this file contain modifications contributed and copyrighted by
@@ -81,7 +81,7 @@ buf_page_peek_if_too_old(
 		unsigned	access_time = buf_page_is_accessed(bpage);
 
 		if (access_time > 0
-		    && (ut_time_ms() - access_time)
+		    && ((ib_uint32_t) (ut_time_ms() - access_time))
 		    >= buf_LRU_old_threshold_ms) {
 			return(TRUE);
 		}
@@ -705,6 +705,12 @@ buf_block_get_lock_hash_val(
 /*========================*/
 	const buf_block_t*	block)	/*!< in: block */
 {
+	ut_ad(block);
+	ut_ad(buf_page_in_file(&block->page));
+#ifdef UNIV_SYNC_DEBUG
+	ut_ad(rw_lock_own(&(((buf_block_t*) block)->lock), RW_LOCK_EXCLUSIVE)
+	      || rw_lock_own(&(((buf_block_t*) block)->lock), RW_LOCK_SHARED));
+#endif /* UNIV_SYNC_DEBUG */
 	return(block->lock_hash_val);
 }
 

=== modified file 'storage/innodb_plugin/include/buf0flu.h'
--- a/storage/innodb_plugin/include/buf0flu.h	2009-07-30 12:42:56 +0000
+++ b/storage/innodb_plugin/include/buf0flu.h	2010-04-01 13:01:22 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -40,6 +40,16 @@ buf_flush_remove(
 /*=============*/
 	buf_page_t*	bpage);	/*!< in: pointer to the block in question */
 /********************************************************************//**
+Relocates a buffer control block on the flush_list.
+Note that it is assumed that the contents of bpage has already been
+copied to dpage. */
+UNIV_INTERN
+void
+buf_flush_relocate_on_flush_list(
+/*=============================*/
+	buf_page_t*	bpage,	/*!< in/out: control block being moved */
+	buf_page_t*	dpage);	/*!< in/out: destination block */
+/********************************************************************//**
 Updates the flush system data structures when a write is completed. */
 UNIV_INTERN
 void
@@ -139,8 +149,8 @@ how much redo the workload is generating
 
 struct buf_flush_stat_struct
 {
-	ib_uint64_t	redo;		/**< amount of redo generated. */
-	ulint		n_flushed;	/**< number of pages flushed. */
+	ib_uint64_t	redo;		/*!< amount of redo generated. */
+	ulint		n_flushed;	/*!< number of pages flushed. */
 };
 
 /** Statistics for selecting flush rate of dirty pages. */
@@ -175,6 +185,22 @@ buf_flush_validate(void);
 /*====================*/
 #endif /* UNIV_DEBUG || UNIV_BUF_DEBUG */
 
+/******************************************************************//**
+Initialize the red-black tree to speed up insertions into the flush_list
+during recovery process. Should be called at the start of recovery
+process before any page has been read/written. */
+UNIV_INTERN
+void
+buf_flush_init_flush_rbt(void);
+/*==========================*/
+
+/******************************************************************//**
+Frees up the red-black tree. */
+UNIV_INTERN
+void
+buf_flush_free_flush_rbt(void);
+/*==========================*/
+
 /** When buf_flush_free_margin is called, it tries to make this many blocks
 available to replacement in the free list and at the end of the LRU list (to
 make sure that a read-ahead batch can be read efficiently in a single

=== modified file 'storage/innodb_plugin/include/data0type.ic'
--- a/storage/innodb_plugin/include/data0type.ic	2009-07-30 12:42:56 +0000
+++ b/storage/innodb_plugin/include/data0type.ic	2010-04-01 13:01:22 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -285,6 +285,10 @@ dtype_new_store_for_order_and_null_size(
 #endif
 	ulint	len;
 
+	ut_ad(type);
+	ut_ad(type->mtype >= DATA_VARCHAR);
+	ut_ad(type->mtype <= DATA_MYSQL);
+
 	buf[0] = (byte)(type->mtype & 0xFFUL);
 
 	if (type->prtype & DATA_BINARY_TYPE) {

=== modified file 'storage/innodb_plugin/include/dict0boot.h'
--- a/storage/innodb_plugin/include/dict0boot.h	2009-05-27 09:45:59 +0000
+++ b/storage/innodb_plugin/include/dict0boot.h	2010-04-01 11:59:25 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -137,6 +137,7 @@ clustered index */
 #define DICT_SYS_INDEXES_PAGE_NO_FIELD	 8
 #define DICT_SYS_INDEXES_SPACE_NO_FIELD	 7
 #define DICT_SYS_INDEXES_TYPE_FIELD	 6
+#define DICT_SYS_INDEXES_NAME_FIELD	 3
 
 /* When a row id which is zero modulo this number (which must be a power of
 two) is assigned, the field DICT_HDR_ROW_ID on the dictionary header page is

=== modified file 'storage/innodb_plugin/include/dict0mem.h'
--- a/storage/innodb_plugin/include/dict0mem.h	2009-10-08 10:00:49 +0000
+++ b/storage/innodb_plugin/include/dict0mem.h	2010-04-01 10:45:58 +0000
@@ -80,21 +80,39 @@ combination of types */
 /** File format */
 /* @{ */
 #define DICT_TF_FORMAT_SHIFT		5	/* file format */
-#define DICT_TF_FORMAT_MASK		(127 << DICT_TF_FORMAT_SHIFT)
+#define DICT_TF_FORMAT_MASK		\
+((~(~0 << (DICT_TF_BITS - DICT_TF_FORMAT_SHIFT))) << DICT_TF_FORMAT_SHIFT)
 #define DICT_TF_FORMAT_51		0	/*!< InnoDB/MySQL up to 5.1 */
 #define DICT_TF_FORMAT_ZIP		1	/*!< InnoDB plugin for 5.1:
 						compressed tables,
 						new BLOB treatment */
 /** Maximum supported file format */
 #define DICT_TF_FORMAT_MAX		DICT_TF_FORMAT_ZIP
-
+/* @} */
 #define DICT_TF_BITS			6	/*!< number of flag bits */
 #if (1 << (DICT_TF_BITS - DICT_TF_FORMAT_SHIFT)) <= DICT_TF_FORMAT_MAX
 # error "DICT_TF_BITS is insufficient for DICT_TF_FORMAT_MAX"
 #endif
 /* @} */
+
+/** @brief Additional table flags.
+
+These flags will be stored in SYS_TABLES.MIX_LEN.  All unused flags
+will be written as 0.  The column may contain garbage for tables
+created with old versions of InnoDB that only implemented
+ROW_FORMAT=REDUNDANT. */
+/* @{ */
+#define DICT_TF2_SHIFT			DICT_TF_BITS
+						/*!< Shift value for
+						table->flags. */
+#define DICT_TF2_TEMPORARY		1	/*!< TRUE for tables from
+						CREATE TEMPORARY TABLE. */
+#define DICT_TF2_BITS			(DICT_TF2_SHIFT + 1)
+						/*!< Total number of bits
+						in table->flags. */
 /* @} */
 
+
 /**********************************************************************//**
 Creates a table memory object.
 @return	own: table object */
@@ -374,7 +392,7 @@ struct dict_table_struct{
 	unsigned	space:32;
 				/*!< space where the clustered index of the
 				table is placed */
-	unsigned	flags:DICT_TF_BITS;/*!< DICT_TF_COMPACT, ... */
+	unsigned	flags:DICT_TF2_BITS;/*!< DICT_TF_COMPACT, ... */
 	unsigned	ibd_file_missing:1;
 				/*!< TRUE if this is in a single-table
 				tablespace and the .ibd file is missing; then

=== modified file 'storage/innodb_plugin/include/fil0fil.h'
--- a/storage/innodb_plugin/include/fil0fil.h	2009-11-30 11:32:05 +0000
+++ b/storage/innodb_plugin/include/fil0fil.h	2010-04-01 11:59:25 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -110,9 +110,10 @@ extern fil_addr_t	fil_addr_null;
 					contents of this field is valid
 					for all uncompressed pages. */
 #define FIL_PAGE_FILE_FLUSH_LSN	26	/*!< this is only defined for the
-					first page in a data file: the file
-					has been flushed to disk at least up
-					to this lsn */
+					first page in a system tablespace
+					data file (ibdata*, not *.ibd):
+					the file has been flushed to disk
+					at least up to this lsn */
 #define FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID  34 /*!< starting from 4.1.x this
 					contains the space id of the page */
 #define FIL_PAGE_DATA		38	/*!< start of the data on the page */

=== modified file 'storage/innodb_plugin/include/hash0hash.h'
--- a/storage/innodb_plugin/include/hash0hash.h	2009-05-27 09:45:59 +0000
+++ b/storage/innodb_plugin/include/hash0hash.h	2010-04-01 13:02:01 +0000
@@ -434,11 +434,12 @@ struct hash_table_struct {
 				these heaps */
 #endif /* !UNIV_HOTBACKUP */
 	mem_heap_t*	heap;
+#ifdef UNIV_DEBUG
 	ulint		magic_n;
+# define HASH_TABLE_MAGIC_N	76561114
+#endif /* UNIV_DEBUG */
 };
 
-#define HASH_TABLE_MAGIC_N	76561114
-
 #ifndef UNIV_NONINL
 #include "hash0hash.ic"
 #endif

=== modified file 'storage/innodb_plugin/include/hash0hash.ic'
--- a/storage/innodb_plugin/include/hash0hash.ic	2009-05-27 09:45:59 +0000
+++ b/storage/innodb_plugin/include/hash0hash.ic	2010-04-01 13:02:01 +0000
@@ -35,6 +35,8 @@ hash_get_nth_cell(
 	hash_table_t*	table,	/*!< in: hash table */
 	ulint		n)	/*!< in: cell index */
 {
+	ut_ad(table);
+	ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
 	ut_ad(n < table->n_cells);
 
 	return(table->array + n);
@@ -48,6 +50,8 @@ hash_table_clear(
 /*=============*/
 	hash_table_t*	table)	/*!< in/out: hash table */
 {
+	ut_ad(table);
+	ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
 	memset(table->array, 0x0,
 	       table->n_cells * sizeof(*table->array));
 }
@@ -61,6 +65,8 @@ hash_get_n_cells(
 /*=============*/
 	hash_table_t*	table)	/*!< in: table */
 {
+	ut_ad(table);
+	ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
 	return(table->n_cells);
 }
 
@@ -74,6 +80,8 @@ hash_calc_hash(
 	ulint		fold,	/*!< in: folded value */
 	hash_table_t*	table)	/*!< in: hash table */
 {
+	ut_ad(table);
+	ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
 	return(ut_hash_ulint(fold, table->n_cells));
 }
 
@@ -88,6 +96,8 @@ hash_get_mutex_no(
 	hash_table_t*	table,	/*!< in: hash table */
 	ulint		fold)	/*!< in: fold */
 {
+	ut_ad(table);
+	ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
 	ut_ad(ut_is_2pow(table->n_mutexes));
 	return(ut_2pow_remainder(hash_calc_hash(fold, table),
 				 table->n_mutexes));
@@ -103,6 +113,8 @@ hash_get_nth_heap(
 	hash_table_t*	table,	/*!< in: hash table */
 	ulint		i)	/*!< in: index of the heap */
 {
+	ut_ad(table);
+	ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
 	ut_ad(i < table->n_mutexes);
 
 	return(table->heaps[i]);
@@ -120,6 +132,9 @@ hash_get_heap(
 {
 	ulint	i;
 
+	ut_ad(table);
+	ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
+
 	if (table->heap) {
 		return(table->heap);
 	}
@@ -139,6 +154,8 @@ hash_get_nth_mutex(
 	hash_table_t*	table,	/*!< in: hash table */
 	ulint		i)	/*!< in: index of the mutex */
 {
+	ut_ad(table);
+	ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
 	ut_ad(i < table->n_mutexes);
 
 	return(table->mutexes + i);
@@ -156,6 +173,9 @@ hash_get_mutex(
 {
 	ulint	i;
 
+	ut_ad(table);
+	ut_ad(table->magic_n == HASH_TABLE_MAGIC_N);
+
 	i = hash_get_mutex_no(table, fold);
 
 	return(hash_get_nth_mutex(table, i));

=== modified file 'storage/innodb_plugin/include/lock0lock.h'
--- a/storage/innodb_plugin/include/lock0lock.h	2009-11-30 11:32:05 +0000
+++ b/storage/innodb_plugin/include/lock0lock.h	2010-04-01 11:17:10 +0000
@@ -613,13 +613,16 @@ lock_rec_print(
 	FILE*		file,	/*!< in: file where to print */
 	const lock_t*	lock);	/*!< in: record type lock */
 /*********************************************************************//**
-Prints info of locks for all transactions. */
+Prints info of locks for all transactions.
+@return FALSE if not able to obtain kernel mutex
+and exits without printing info */
 UNIV_INTERN
-void
+ibool
 lock_print_info_summary(
 /*====================*/
-	FILE*	file);	/*!< in: file where to print */
-/*********************************************************************//**
+	FILE*	file,	/*!< in: file where to print */
+	ibool   nowait);/*!< in: whether to wait for the kernel mutex */
+/*************************************************************************
 Prints info of locks for each transaction. */
 UNIV_INTERN
 void

=== modified file 'storage/innodb_plugin/include/log0log.h'
--- a/storage/innodb_plugin/include/log0log.h	2009-11-30 11:32:05 +0000
+++ b/storage/innodb_plugin/include/log0log.h	2010-04-01 11:59:25 +0000
@@ -1,23 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
 Copyright (c) 2009, Google Inc.
 
 Portions of this file contain modifications contributed and copyrighted by
@@ -825,7 +808,17 @@ struct log_struct{
 					written to some log group; for this to
 					be advanced, it is enough that the
 					write i/o has been completed for all
-					log groups */
+					log groups.
+					Note that since InnoDB currently
+					has only one log group therefore
+					this value is redundant. Also it
+					is possible that this value
+					falls behind the
+					flushed_to_disk_lsn transiently.
+					It is appropriate to use either
+					flushed_to_disk_lsn or
+					write_lsn which are always
+					up-to-date and accurate. */
 	ib_uint64_t	write_lsn;	/*!< end lsn for the current running
 					write */
 	ulint		write_end_offset;/*!< the data in buffer has

=== modified file 'storage/innodb_plugin/include/log0log.ic'
--- a/storage/innodb_plugin/include/log0log.ic	2009-10-09 12:52:18 +0000
+++ b/storage/innodb_plugin/include/log0log.ic	2010-04-01 11:59:25 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -314,12 +314,15 @@ log_reserve_and_write_fast(
 	ulint		data_len;
 #ifdef UNIV_LOG_LSN_DEBUG
 	/* length of the LSN pseudo-record */
-	ulint		lsn_len = 1
-		+ mach_get_compressed_size(log_sys->lsn >> 32)
-		+ mach_get_compressed_size(log_sys->lsn & 0xFFFFFFFFUL);
+	ulint		lsn_len;
 #endif /* UNIV_LOG_LSN_DEBUG */
 
 	mutex_enter(&log_sys->mutex);
+#ifdef UNIV_LOG_LSN_DEBUG
+	lsn_len = 1
+		+ mach_get_compressed_size(log_sys->lsn >> 32)
+		+ mach_get_compressed_size(log_sys->lsn & 0xFFFFFFFFUL);
+#endif /* UNIV_LOG_LSN_DEBUG */
 
 	data_len = len
 #ifdef UNIV_LOG_LSN_DEBUG

=== modified file 'storage/innodb_plugin/include/log0recv.h'
--- a/storage/innodb_plugin/include/log0recv.h	2009-11-30 11:32:05 +0000
+++ b/storage/innodb_plugin/include/log0recv.h	2010-04-01 11:59:25 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1997, 2010, Innobase Oy. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -176,6 +176,12 @@ UNIV_INTERN
 void
 recv_recovery_from_checkpoint_finish(void);
 /*======================================*/
+/********************************************************//**
+Initiates the rollback of active transactions. */
+UNIV_INTERN
+void
+recv_recovery_rollback_active(void);
+/*===============================*/
 /*******************************************************//**
 Scans log from a buffer and stores new log data to the parsing buffer.
 Parses and hashes the log records if new data found.  Unless
@@ -258,12 +264,14 @@ void
 recv_sys_init(
 /*==========*/
 	ulint	available_memory);	/*!< in: available memory in bytes */
+#ifndef UNIV_HOTBACKUP
 /********************************************************//**
 Reset the state of the recovery system variables. */
 UNIV_INTERN
 void
 recv_sys_var_init(void);
 /*===================*/
+#endif /* !UNIV_HOTBACKUP */
 /*******************************************************************//**
 Empties the hash table of stored log records, applying them to appropriate
 pages. */

=== modified file 'storage/innodb_plugin/include/mem0dbg.h'
--- a/storage/innodb_plugin/include/mem0dbg.h	2009-05-27 09:45:59 +0000
+++ b/storage/innodb_plugin/include/mem0dbg.h	2010-04-01 11:59:25 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -28,6 +28,13 @@ Created 6/9/1994 Heikki Tuuri
 check fields whose sizes are given below */
 
 #ifdef UNIV_MEM_DEBUG
+# ifndef UNIV_HOTBACKUP
+/* The mutex which protects in the debug version the hash table
+containing the list of live memory heaps, and also the global
+variables in mem0dbg.c. */
+extern mutex_t	mem_hash_mutex;
+# endif /* !UNIV_HOTBACKUP */
+
 #define MEM_FIELD_HEADER_SIZE	ut_calc_align(2 * sizeof(ulint),\
 						UNIV_MEM_ALIGNMENT)
 #define MEM_FIELD_TRAILER_SIZE	sizeof(ulint)

=== modified file 'storage/innodb_plugin/include/mem0dbg.ic'
--- a/storage/innodb_plugin/include/mem0dbg.ic	2009-05-27 09:45:59 +0000
+++ b/storage/innodb_plugin/include/mem0dbg.ic	2010-04-01 11:59:25 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -25,9 +25,6 @@ Created 6/8/1994 Heikki Tuuri
 *************************************************************************/
 
 #ifdef UNIV_MEM_DEBUG
-# ifndef UNIV_HOTBACKUP
-extern mutex_t	mem_hash_mutex;
-# endif /* !UNIV_HOTBACKUP */
 extern ulint	mem_current_allocated_memory;
 
 /******************************************************************//**

=== modified file 'storage/innodb_plugin/include/mem0mem.h'
--- a/storage/innodb_plugin/include/mem0mem.h	2009-11-30 11:32:05 +0000
+++ b/storage/innodb_plugin/include/mem0mem.h	2010-04-01 11:59:25 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -359,6 +359,9 @@ struct mem_block_info_struct {
 			to the heap is also the first block in this list,
 			though it also contains the base node of the list. */
 	ulint	len;	/*!< physical length of this block in bytes */
+	ulint	total_size; /* physical length in bytes of all blocks
+			in the heap. This is defined only in the base
+			node and is set to ULINT_UNDEFINED in others. */
 	ulint	type;	/*!< type of heap: MEM_HEAP_DYNAMIC, or
 			MEM_HEAP_BUF possibly ORed to MEM_HEAP_BTR_SEARCH */
 	ulint	free;	/*!< offset in bytes of the first free position for

=== modified file 'storage/innodb_plugin/include/mem0mem.ic'
--- a/storage/innodb_plugin/include/mem0mem.ic	2009-05-27 09:45:59 +0000
+++ b/storage/innodb_plugin/include/mem0mem.ic	2010-04-01 11:59:25 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -579,18 +579,12 @@ mem_heap_get_size(
 /*==============*/
 	mem_heap_t*	heap)	/*!< in: heap */
 {
-	mem_block_t*	block;
 	ulint		size	= 0;
 
 	ut_ad(mem_heap_check(heap));
 
-	block = heap;
-
-	while (block != NULL) {
+	size = heap->total_size;
 
-		size += mem_block_get_len(block);
-		block = UT_LIST_GET_NEXT(list, block);
-	}
 #ifndef UNIV_HOTBACKUP
 	if (heap->free_block) {
 		size += UNIV_PAGE_SIZE;

=== modified file 'storage/innodb_plugin/include/mtr0mtr.ic'
--- a/storage/innodb_plugin/include/mtr0mtr.ic	2009-05-27 09:45:59 +0000
+++ b/storage/innodb_plugin/include/mtr0mtr.ic	2010-04-01 13:01:22 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -70,6 +70,7 @@ mtr_memo_push(
 	ut_ad(type <= MTR_MEMO_X_LOCK);
 	ut_ad(mtr);
 	ut_ad(mtr->magic_n == MTR_MAGIC_N);
+	ut_ad(mtr->state == MTR_ACTIVE);
 
 	memo = &(mtr->memo);
 
@@ -92,6 +93,7 @@ mtr_set_savepoint(
 
 	ut_ad(mtr);
 	ut_ad(mtr->magic_n == MTR_MAGIC_N);
+	ut_ad(mtr->state == MTR_ACTIVE);
 
 	memo = &(mtr->memo);
 
@@ -149,6 +151,7 @@ mtr_memo_contains(
 
 	ut_ad(mtr);
 	ut_ad(mtr->magic_n == MTR_MAGIC_N);
+	ut_ad(mtr->state == MTR_ACTIVE || mtr->state == MTR_COMMITTING);
 
 	memo = &(mtr->memo);
 

=== modified file 'storage/innodb_plugin/include/os0file.h'
--- a/storage/innodb_plugin/include/os0file.h	2009-11-30 12:04:09 +0000
+++ b/storage/innodb_plugin/include/os0file.h	2010-04-01 11:59:25 +0000
@@ -1,23 +1,6 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
 /***********************************************************************
 
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
 Copyright (c) 2009, Percona Inc.
 
 Portions of this file contain modifications contributed and copyrighted

=== modified file 'storage/innodb_plugin/include/que0que.h'
--- a/storage/innodb_plugin/include/que0que.h	2009-07-30 12:42:56 +0000
+++ b/storage/innodb_plugin/include/que0que.h	2010-04-01 11:59:25 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -30,6 +30,7 @@ Created 5/27/1996 Heikki Tuuri
 #include "data0data.h"
 #include "dict0types.h"
 #include "trx0trx.h"
+#include "trx0roll.h"
 #include "srv0srv.h"
 #include "usr0types.h"
 #include "que0types.h"
@@ -215,6 +216,16 @@ trx_t*
 thr_get_trx(
 /*========*/
 	que_thr_t*	thr);	/*!< in: query thread */
+/*******************************************************************//**
+Determines if this thread is rolling back an incomplete transaction
+in crash recovery.
+@return TRUE if thr is rolling back an incomplete transaction in crash
+recovery */
+UNIV_INLINE
+ibool
+thr_is_recv(
+/*========*/
+	const que_thr_t*	thr);	/*!< in: query thread */
 /***********************************************************************//**
 Gets the type of a graph node. */
 UNIV_INLINE

=== modified file 'storage/innodb_plugin/include/que0que.ic'
--- a/storage/innodb_plugin/include/que0que.ic	2009-05-27 09:45:59 +0000
+++ b/storage/innodb_plugin/include/que0que.ic	2010-04-01 11:59:25 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -38,6 +38,20 @@ thr_get_trx(
 	return(thr->graph->trx);
 }
 
+/*******************************************************************//**
+Determines if this thread is rolling back an incomplete transaction
+in crash recovery.
+@return TRUE if thr is rolling back an incomplete transaction in crash
+recovery */
+UNIV_INLINE
+ibool
+thr_is_recv(
+/*========*/
+	const que_thr_t*	thr)	/*!< in: query thread */
+{
+	return(trx_is_recv(thr->graph->trx));
+}
+
 /***********************************************************************//**
 Gets the first thr in a fork. */
 UNIV_INLINE

=== modified file 'storage/innodb_plugin/include/row0mysql.h'
--- a/storage/innodb_plugin/include/row0mysql.h	2009-11-03 10:22:15 +0000
+++ b/storage/innodb_plugin/include/row0mysql.h	2010-04-01 11:59:25 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2000, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2000, 2010, Innobase Oy. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -451,6 +451,12 @@ row_drop_table_for_mysql(
 	const char*	name,	/*!< in: table name */
 	trx_t*		trx,	/*!< in: transaction handle */
 	ibool		drop_db);/*!< in: TRUE=dropping whole database */
+/*********************************************************************//**
+Drop all temporary tables during crash recovery. */
+UNIV_INTERN
+void
+row_mysql_drop_temp_tables(void);
+/*============================*/
 
 /*********************************************************************//**
 Discards the tablespace of a table which stored in an .ibd file. Discarding
@@ -494,14 +500,19 @@ row_rename_table_for_mysql(
 	trx_t*		trx,		/*!< in: transaction handle */
 	ibool		commit);	/*!< in: if TRUE then commit trx */
 /*********************************************************************//**
-Checks a table for corruption.
-@return	DB_ERROR or DB_SUCCESS */
+Checks that the index contains entries in an ascending order, unique
+constraint is not broken, and calculates the number of index entries
+in the read view of the current transaction.
+@return	DB_SUCCESS if ok */
 UNIV_INTERN
 ulint
-row_check_table_for_mysql(
+row_check_index_for_mysql(
 /*======================*/
-	row_prebuilt_t*	prebuilt);	/*!< in: prebuilt struct in MySQL
-					handle */
+	row_prebuilt_t*		prebuilt,	/*!< in: prebuilt struct
+						in MySQL handle */
+	const dict_index_t*	index,		/*!< in: index */
+	ulint*			n_rows);	/*!< out: number of entries
+						seen in the consistent read */
 
 /*********************************************************************//**
 Determines if a table is a magic monitor table.

=== modified file 'storage/innodb_plugin/include/row0sel.h'
--- a/storage/innodb_plugin/include/row0sel.h	2009-05-27 09:45:59 +0000
+++ b/storage/innodb_plugin/include/row0sel.h	2010-04-01 13:01:22 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1997, 2010, Innobase Oy. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -105,17 +105,6 @@ row_fetch_print(
 /*============*/
 	void*	row,		/*!< in:  sel_node_t* */
 	void*	user_arg);	/*!< in:  not used */
-/****************************************************************//**
-Callback function for fetch that stores an unsigned 4 byte integer to the
-location pointed. The column's type must be DATA_INT, DATA_UNSIGNED, length
-= 4.
-@return	always returns NULL */
-UNIV_INTERN
-void*
-row_fetch_store_uint4(
-/*==================*/
-	void*	row,		/*!< in:  sel_node_t* */
-	void*	user_arg);	/*!< in:  data pointer */
 /***********************************************************//**
 Prints a row in a select result.
 @return	query thread to run next or NULL */

=== modified file 'storage/innodb_plugin/include/srv0srv.h'
--- a/storage/innodb_plugin/include/srv0srv.h	2009-11-30 11:32:05 +0000
+++ b/storage/innodb_plugin/include/srv0srv.h	2010-04-01 11:59:25 +0000
@@ -1,7 +1,8 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
 Copyright (c) 2008, 2009, Google Inc.
+Copyright (c) 2009, Percona Inc.
 
 Portions of this file contain modifications contributed and copyrighted by
 Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -9,6 +10,13 @@ briefly in the InnoDB documentation. The
 incorporated with their permission, and subject to the conditions contained in
 the file COPYING.Google.
 
+Portions of this file contain modifications contributed and copyrighted
+by Percona Inc.. Those modifications are
+gratefully acknowledged and are described briefly in the InnoDB
+documentation. The contributions by Percona Inc. are incorporated with
+their permission, and subject to the conditions contained in the file
+COPYING.Percona.
+
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
 Foundation; version 2 of the License.
@@ -22,32 +30,6 @@ this program; if not, write to the Free 
 Place, Suite 330, Boston, MA 02111-1307 USA
 
 *****************************************************************************/
-/***********************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-Copyright (c) 2009, Percona Inc.
-
-Portions of this file contain modifications contributed and copyrighted
-by Percona Inc.. Those modifications are
-gratefully acknowledged and are described briefly in the InnoDB
-documentation. The contributions by Percona Inc. are incorporated with
-their permission, and subject to the conditions contained in the file
-COPYING.Percona.
-
-This program is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
-Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-***********************************************************************/
 
 /**************************************************//**
 @file include/srv0srv.h
@@ -227,7 +209,8 @@ extern ibool	srv_print_innodb_tablespace
 extern ibool	srv_print_verbose_log;
 extern ibool	srv_print_innodb_table_monitor;
 
-extern ibool	srv_lock_timeout_and_monitor_active;
+extern ibool	srv_lock_timeout_active;
+extern ibool	srv_monitor_active;
 extern ibool	srv_error_monitor_active;
 
 extern ulong	srv_n_spin_wait_rounds;
@@ -540,15 +523,23 @@ srv_release_mysql_thread_if_suspended(
 				MySQL OS thread	 */
 /*********************************************************************//**
 A thread which wakes up threads whose lock wait may have lasted too long.
-This also prints the info output by various InnoDB monitors.
 @return	a dummy parameter */
 UNIV_INTERN
 os_thread_ret_t
-srv_lock_timeout_and_monitor_thread(
-/*================================*/
+srv_lock_timeout_thread(
+/*====================*/
 	void*	arg);	/*!< in: a dummy parameter required by
 			os_thread_create */
 /*********************************************************************//**
+A thread which prints the info output by various InnoDB monitors.
+@return	a dummy parameter */
+UNIV_INTERN
+os_thread_ret_t
+srv_monitor_thread(
+/*===============*/
+	void*	arg);	/*!< in: a dummy parameter required by
+			os_thread_create */
+/*************************************************************************
 A thread which prints warnings about semaphore waits which have lasted
 too long. These can be used to track bugs which cause hangs.
 @return	a dummy parameter */
@@ -559,12 +550,15 @@ srv_error_monitor_thread(
 	void*	arg);	/*!< in: a dummy parameter required by
 			os_thread_create */
 /******************************************************************//**
-Outputs to a file the output of the InnoDB Monitor. */
+Outputs to a file the output of the InnoDB Monitor.
+@return FALSE if not all information printed
+due to failure to obtain necessary mutex */
 UNIV_INTERN
-void
+ibool
 srv_printf_innodb_monitor(
 /*======================*/
 	FILE*	file,		/*!< in: output stream */
+	ibool	nowait,		/*!< in: whether to wait for kernel mutex */
 	ulint*	trx_start,	/*!< out: file position of the start of
 				the list of active transactions */
 	ulint*	trx_end);	/*!< out: file position of the end of

=== modified file 'storage/innodb_plugin/include/sync0rw.h'
--- a/storage/innodb_plugin/include/sync0rw.h	2009-05-27 09:45:59 +0000
+++ b/storage/innodb_plugin/include/sync0rw.h	2010-04-01 13:01:22 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
 Copyright (c) 2008, Google Inc.
 
 Portions of this file contain modifications contributed and copyrighted by
@@ -429,8 +429,9 @@ ibool
 rw_lock_own(
 /*========*/
 	rw_lock_t*	lock,		/*!< in: rw-lock */
-	ulint		lock_type);	/*!< in: lock type: RW_LOCK_SHARED,
+	ulint		lock_type)	/*!< in: lock type: RW_LOCK_SHARED,
 					RW_LOCK_EX */
+	__attribute__((warn_unused_result));
 #endif /* UNIV_SYNC_DEBUG */
 /******************************************************************//**
 Checks if somebody has locked the rw-lock in the specified mode. */

=== modified file 'storage/innodb_plugin/include/sync0sync.h'
--- a/storage/innodb_plugin/include/sync0sync.h	2009-07-30 12:42:56 +0000
+++ b/storage/innodb_plugin/include/sync0sync.h	2010-04-01 12:54:22 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
 Copyright (c) 2008, Google Inc.
 
 Portions of this file contain modifications contributed and copyrighted by
@@ -206,7 +206,8 @@ UNIV_INTERN
 ibool
 mutex_own(
 /*======*/
-	const mutex_t*	mutex);	/*!< in: mutex */
+	const mutex_t*	mutex)	/*!< in: mutex */
+	__attribute__((warn_unused_result));
 #endif /* UNIV_DEBUG */
 #ifdef UNIV_SYNC_DEBUG
 /******************************************************************//**
@@ -238,16 +239,27 @@ ibool
 sync_thread_levels_empty(void);
 /*==========================*/
 /******************************************************************//**
-Checks that the level array for the current thread is empty.
-@return	TRUE if empty except the exceptions specified below */
-UNIV_INTERN
-ibool
-sync_thread_levels_empty_gen(
-/*=========================*/
+Checks if the level array for the current thread contains a
+mutex or rw-latch at the specified level.
+@return	a matching latch, or NULL if not found */
+UNIV_INTERN
+void*
+sync_thread_levels_contains(
+/*========================*/
+	ulint	level);			/*!< in: latching order level
+					(SYNC_DICT, ...)*/
+/******************************************************************//**
+Checks if the level array for the current thread is empty.
+@return	a latch, or NULL if empty except the exceptions specified below */
+UNIV_INTERN
+void*
+sync_thread_levels_nonempty_gen(
+/*============================*/
 	ibool	dict_mutex_allowed);	/*!< in: TRUE if dictionary mutex is
 					allowed to be owned by the thread,
 					also purge_is_running mutex is
 					allowed */
+#define sync_thread_levels_empty_gen(d) (!sync_thread_levels_nonempty_gen(d))
 /******************************************************************//**
 Gets the debug information for a reserved mutex. */
 UNIV_INTERN

=== modified file 'storage/innodb_plugin/include/trx0rseg.h'
--- a/storage/innodb_plugin/include/trx0rseg.h	2009-11-30 11:32:05 +0000
+++ b/storage/innodb_plugin/include/trx0rseg.h	2010-04-01 11:59:25 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -114,17 +114,6 @@ trx_rseg_list_and_array_init(
 /*=========================*/
 	trx_sysf_t*	sys_header,	/*!< in: trx system header */
 	mtr_t*		mtr);		/*!< in: mtr */
-/****************************************************************//**
-Creates a new rollback segment to the database.
-@return	the created segment object, NULL if fail */
-UNIV_INTERN
-trx_rseg_t*
-trx_rseg_create(
-/*============*/
-	ulint	space,		/*!< in: space id */
-	ulint	max_size,	/*!< in: max size in pages */
-	ulint*	id,		/*!< out: rseg id */
-	mtr_t*	mtr);		/*!< in: mtr */
 /***************************************************************************
 Free's an instance of the rollback segment in memory. */
 UNIV_INTERN

=== modified file 'storage/innodb_plugin/include/trx0sys.h'
--- a/storage/innodb_plugin/include/trx0sys.h	2009-11-30 11:32:05 +0000
+++ b/storage/innodb_plugin/include/trx0sys.h	2010-04-01 11:01:13 +0000
@@ -333,12 +333,14 @@ UNIV_INTERN
 void
 trx_sys_file_format_tag_init(void);
 /*==============================*/
+#ifndef UNIV_HOTBACKUP
 /*****************************************************************//**
 Shutdown/Close the transaction system. */
 UNIV_INTERN
 void
 trx_sys_close(void);
 /*===============*/
+#endif /* !UNIV_HOTBACKUP */
 /*****************************************************************//**
 Get the name representation of the file format from its id.
 @return	pointer to the name */
@@ -495,7 +497,6 @@ this contains the same fields as TRX_SYS
 						within that file */
 #define TRX_SYS_MYSQL_LOG_NAME		12	/*!< MySQL log file name */
 
-#ifndef UNIV_HOTBACKUP
 /** Doublewrite buffer */
 /* @{ */
 /** The offset of the doublewrite buffer header on the trx system header page */
@@ -547,6 +548,7 @@ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_NO. */
 #define TRX_SYS_DOUBLEWRITE_BLOCK_SIZE	FSP_EXTENT_SIZE
 /* @} */
 
+#ifndef UNIV_HOTBACKUP
 /** File format tag */
 /* @{ */
 /** The offset of the file format tag on the trx system header page

=== modified file 'storage/innodb_plugin/include/trx0trx.h'
--- a/storage/innodb_plugin/include/trx0trx.h	2009-12-01 10:38:40 +0000
+++ b/storage/innodb_plugin/include/trx0trx.h	2010-04-01 11:59:25 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -349,7 +349,7 @@ trx_print(
 				   use the default max length */
 
 /** Type of data dictionary operation */
-enum trx_dict_op {
+typedef enum trx_dict_op {
 	/** The transaction is not modifying the data dictionary. */
 	TRX_DICT_OP_NONE = 0,
 	/** The transaction is creating a table or an index, or
@@ -361,7 +361,7 @@ enum trx_dict_op {
 	existing table.  In crash recovery, the data dictionary
 	must be locked, but the table must not be dropped. */
 	TRX_DICT_OP_INDEX = 2
-};
+} trx_dict_op_t;
 
 /**********************************************************************//**
 Determine if a transaction is a dictionary operation.
@@ -463,69 +463,79 @@ rolling back after a database recovery *
 
 struct trx_struct{
 	ulint		magic_n;
-	/* All the next fields are protected by the kernel mutex, except the
-	undo logs which are protected by undo_mutex */
+
+	/* These fields are not protected by any mutex. */
 	const char*	op_info;	/*!< English text describing the
 					current operation, or an empty
 					string */
-	unsigned	is_purge:1;	/*!< 0=user transaction, 1=purge */
-	unsigned	is_recovered:1;	/*!< 0=normal transaction,
-					1=recovered, must be rolled back */
-	unsigned	conc_state:2;	/*!< state of the trx from the point
+	ulint		conc_state;	/*!< state of the trx from the point
 					of view of concurrency control:
 					TRX_ACTIVE, TRX_COMMITTED_IN_MEMORY,
 					... */
-	unsigned	que_state:2;	/*!< valid when conc_state == TRX_ACTIVE:
-					TRX_QUE_RUNNING, TRX_QUE_LOCK_WAIT,
-					... */
-	unsigned	isolation_level:2;/* TRX_ISO_REPEATABLE_READ, ... */
-	unsigned	check_foreigns:1;/* normally TRUE, but if the user
+	ulint		isolation_level;/* TRX_ISO_REPEATABLE_READ, ... */
+	ulint		check_foreigns;	/* normally TRUE, but if the user
 					wants to suppress foreign key checks,
 					(in table imports, for example) we
 					set this FALSE */
-	unsigned	check_unique_secondary:1;
+	ulint		check_unique_secondary;
 					/* normally TRUE, but if the user
 					wants to speed up inserts by
 					suppressing unique key checks
 					for secondary indexes when we decide
 					if we can use the insert buffer for
 					them, we set this FALSE */
-	unsigned	support_xa:1;	/*!< normally we do the XA two-phase
+	ulint		support_xa;	/*!< normally we do the XA two-phase
 					commit steps, but by setting this to
 					FALSE, one can save CPU time and about
 					150 bytes in the undo log size as then
 					we skip XA steps */
-	unsigned	flush_log_later:1;/* In 2PC, we hold the
+	ulint		flush_log_later;/* In 2PC, we hold the
 					prepare_commit mutex across
 					both phases. In that case, we
 					defer flush of the logs to disk
 					until after we release the
 					mutex. */
-	unsigned	must_flush_log_later:1;/* this flag is set to TRUE in
+	ulint		must_flush_log_later;/* this flag is set to TRUE in
 					trx_commit_off_kernel() if
 					flush_log_later was TRUE, and there
 					were modifications by the transaction;
 					in that case we must flush the log
 					in trx_commit_complete_for_mysql() */
-	unsigned	dict_operation:2;/**< @see enum trx_dict_op */
-	unsigned	duplicates:2;	/*!< TRX_DUP_IGNORE | TRX_DUP_REPLACE */
-	unsigned	active_trans:2;	/*!< 1 - if a transaction in MySQL
+	ulint		duplicates;	/*!< TRX_DUP_IGNORE | TRX_DUP_REPLACE */
+	ulint		active_trans;	/*!< 1 - if a transaction in MySQL
 					is active. 2 - if prepare_commit_mutex
 					was taken */
-	unsigned	has_search_latch:1;
+	ulint		has_search_latch;
 					/* TRUE if this trx has latched the
 					search system latch in S-mode */
-	unsigned	declared_to_be_inside_innodb:1;
+	ulint		deadlock_mark;	/*!< a mark field used in deadlock
+					checking algorithm.  */
+	trx_dict_op_t	dict_operation;	/**< @see enum trx_dict_op */
+
+	/* Fields protected by the srv_conc_mutex. */
+	ulint		declared_to_be_inside_innodb;
 					/* this is TRUE if we have declared
 					this transaction in
 					srv_conc_enter_innodb to be inside the
 					InnoDB engine */
-	unsigned	handling_signals:1;/* this is TRUE as long as the trx
-					is handling signals */
-	unsigned	dict_operation_lock_mode:2;
-					/* 0, RW_S_LATCH, or RW_X_LATCH:
+
+	/* Fields protected by dict_operation_lock. The very latch
+	it is used to track. */
+	ulint		dict_operation_lock_mode;
+					/*!< 0, RW_S_LATCH, or RW_X_LATCH:
 					the latch mode trx currently holds
 					on dict_operation_lock */
+
+	/* All the next fields are protected by the kernel mutex, except the
+	undo logs which are protected by undo_mutex */
+	ulint		is_purge;	/*!< 0=user transaction, 1=purge */
+	ulint		is_recovered;	/*!< 0=normal transaction,
+					1=recovered, must be rolled back */
+	ulint		que_state;	/*!< valid when conc_state
+					== TRX_ACTIVE: TRX_QUE_RUNNING,
+					TRX_QUE_LOCK_WAIT, ... */
+	ulint		handling_signals;/* this is TRUE as long as the trx
+					is handling signals */
 	time_t		start_time;	/*!< time the trx object was created
 					or the state last time became
 					TRX_ACTIVE */
@@ -640,11 +650,6 @@ struct trx_struct{
 			wait_thrs;	/*!< query threads belonging to this
 					trx that are in the QUE_THR_LOCK_WAIT
 					state */
-	ulint		deadlock_mark;	/*!< a mark field used in deadlock
-					checking algorithm.  This must be
-					in its own machine word, because
-					it can be changed by other
-					threads while holding kernel_mutex. */
 	/*------------------------------*/
 	mem_heap_t*	lock_heap;	/*!< memory heap for the locks of the
 					transaction */

=== modified file 'storage/innodb_plugin/include/trx0types.h'
--- a/storage/innodb_plugin/include/trx0types.h	2009-09-02 23:48:06 +0000
+++ b/storage/innodb_plugin/include/trx0types.h	2010-04-01 11:59:25 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -70,6 +70,13 @@ typedef struct trx_named_savept_struct t
 enum trx_rb_ctx {
 	RB_NONE = 0,	/*!< no rollback */
 	RB_NORMAL,	/*!< normal rollback */
+	RB_RECOVERY_PURGE_REC,
+			/*!< rolling back an incomplete transaction,
+			in crash recovery, rolling back an
+			INSERT that was performed by updating a
+			delete-marked record; if the delete-marked record
+			no longer exists in an active read view, it will
+			be purged */
 	RB_RECOVERY	/*!< rolling back an incomplete transaction,
 			in crash recovery */
 };

=== modified file 'storage/innodb_plugin/include/univ.i'
--- a/storage/innodb_plugin/include/univ.i	2009-11-30 13:13:34 +0000
+++ b/storage/innodb_plugin/include/univ.i	2010-04-01 11:59:25 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
 Copyright (c) 2008, Google Inc.
 Copyright (c) 2009, Sun Microsystems, Inc.
 
@@ -46,7 +46,7 @@ Created 1/20/1994 Heikki Tuuri
 
 #define INNODB_VERSION_MAJOR	1
 #define INNODB_VERSION_MINOR	0
-#define INNODB_VERSION_BUGFIX	6
+#define INNODB_VERSION_BUGFIX	7
 
 /* The following is the InnoDB version as shown in
 SELECT plugin_version FROM information_schema.plugins;
@@ -229,11 +229,6 @@ by one. */
 			/* the above option prevents forcing of log to disk
 			at a buffer page write: it should be tested with this
 			option off; also some ibuf tests are suppressed */
-/*
-#define UNIV_BASIC_LOG_DEBUG
-*/
-			/* the above option enables basic recovery debugging:
-			new allocated file pages are reset */
 
 /* Linkage specifier for non-static InnoDB symbols (variables and functions)
 that are only referenced from within InnoDB, not from MySQL */

=== added file 'storage/innodb_plugin/include/ut0rbt.h'
--- a/storage/innodb_plugin/include/ut0rbt.h	1970-01-01 00:00:00 +0000
+++ b/storage/innodb_plugin/include/ut0rbt.h	2010-04-01 12:56:22 +0000
@@ -0,0 +1,309 @@
+/*****************************************************************************
+Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/*******************************************************************//**
+@file include/ut0rbt.h
+Red-Black tree implementation.
+
+Created 2007-03-20 Sunny Bains
+************************************************************************/
+
+#ifndef INNOBASE_UT0RBT_H
+#define INNOBASE_UT0RBT_H
+
+#if !defined(IB_RBT_TESTING)
+#include "univ.i"
+#include "ut0mem.h"
+#else
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <assert.h>
+
+#define	ut_malloc	malloc
+#define	ut_free		free
+#define	ulint		unsigned long
+#define	ut_a(c)		assert(c)
+#define ut_error	assert(0)
+#define	ibool		unsigned int
+#define	TRUE		1
+#define	FALSE		0
+#endif
+
+/* Red black tree typedefs */
+typedef struct ib_rbt_struct ib_rbt_t;
+typedef struct ib_rbt_node_struct ib_rbt_node_t;
+/* FIXME: Iterator is a better name than _bound_ */
+typedef struct ib_rbt_bound_struct ib_rbt_bound_t;
+typedef void (*ib_rbt_print_node)(const ib_rbt_node_t* node);
+typedef int (*ib_rbt_compare)(const void* p1, const void* p2);
+
+/* Red black tree color types */
+enum ib_rbt_color_enum {
+	IB_RBT_RED,
+	IB_RBT_BLACK
+};
+
+typedef enum ib_rbt_color_enum ib_rbt_color_t;
+
+/* Red black tree node */
+struct ib_rbt_node_struct {
+	ib_rbt_color_t	color;			/* color of this node */
+
+	ib_rbt_node_t*	left;			/* points left child */
+	ib_rbt_node_t*	right;			/* points right child */
+	ib_rbt_node_t*	parent;			/* points parent node */
+
+	char		value[1];		/* Data value */
+};
+
+/* Red black tree instance.*/
+struct	ib_rbt_struct {
+	ib_rbt_node_t*	nil;			/* Black colored node that is
+						used as a sentinel. This is
+						pre-allocated too.*/
+
+	ib_rbt_node_t*	root;			/* Root of the tree, this is
+						pre-allocated and the first
+						data node is the left child.*/
+
+	ulint		n_nodes;		/* Total number of data nodes */
+
+	ib_rbt_compare	compare;		/* Fn. to use for comparison */
+	ulint		sizeof_value;		/* Sizeof the item in bytes */
+};
+
+/* The result of searching for a key in the tree, this is useful for
+a speedy lookup and insert if key doesn't exist.*/
+struct ib_rbt_bound_struct {
+	const ib_rbt_node_t*
+			last;			/* Last node visited */
+
+	int		result;			/* Result of comparing with
+						the last non-nil node that
+						was visited */
+};
+
+/* Size in elements (t is an rb tree instance) */
+#define rbt_size(t)	(t->n_nodes)
+
+/* Check whether the rb tree is empty (t is an rb tree instance) */
+#define rbt_empty(t)	(rbt_size(t) == 0)
+
+/* Get data value (t is the data type, n is an rb tree node instance) */
+#define rbt_value(t, n) ((t*) &n->value[0])
+
+/* Compare a key with the node value (t is tree, k is key, n is node)*/
+#define rbt_compare(t, k, n) (t->compare(k, n->value))
+
+/****************************************************************//**
+Free an instance of  a red black tree */
+UNIV_INTERN
+void
+rbt_free(
+/*=====*/
+	ib_rbt_t*	tree);		/*!< in: rb tree to free */
+/****************************************************************//**
+Create an instance of a red black tree
+@return	rb tree instance */
+UNIV_INTERN
+ib_rbt_t*
+rbt_create(
+/*=======*/
+	size_t		sizeof_value,	/*!< in: size in bytes */
+	ib_rbt_compare	compare);	/*!< in: comparator */
+/****************************************************************//**
+Delete a node from the red black tree, identified by key.
+@return TRUE if success FALSE if not found */
+UNIV_INTERN
+ibool
+rbt_delete(
+/*=======*/
+	ib_rbt_t*	tree,		/*!< in: rb tree */
+	const void*	key);		/*!< in: key to delete */
+/****************************************************************//**
+Remove a node from the rb tree, the node is not free'd, that is the
+callers responsibility.
+@return	the deleted node with the const. */
+UNIV_INTERN
+ib_rbt_node_t*
+rbt_remove_node(
+/*============*/
+	ib_rbt_t*	tree,		/*!< in: rb tree */
+	const ib_rbt_node_t*
+			node);		/*!< in: node to delete, this
+					is a fudge and declared const
+					because the caller has access
+					only to const nodes.*/
+/****************************************************************//**
+Find a matching node in the rb tree.
+@return	node if found else return NULL */
+UNIV_INTERN
+const ib_rbt_node_t*
+rbt_lookup(
+/*=======*/
+	const ib_rbt_t*	tree,		/*!< in: rb tree to search */
+	const void*	key);		/*!< in: key to lookup */
+/****************************************************************//**
+Generic insert of a value in the rb tree.
+@return	inserted node */
+UNIV_INTERN
+const ib_rbt_node_t*
+rbt_insert(
+/*=======*/
+	ib_rbt_t*	tree,		/*!< in: rb tree */
+	const void*	key,		/*!< in: key for ordering */
+	const void*	value);		/*!< in: data that will be
+					copied to the node.*/
+/****************************************************************//**
+Add a new node to the tree, useful for data that is pre-sorted.
+@return	appended node */
+UNIV_INTERN
+const ib_rbt_node_t*
+rbt_add_node(
+/*=========*/
+	ib_rbt_t*	tree,		/*!< in: rb tree */
+	ib_rbt_bound_t*	parent,		/*!< in: parent */
+	const void*	value);		/*!< in: this value is copied
+					to the node */
+/****************************************************************//**
+Return the left most data node in the tree
+@return	left most node */
+UNIV_INTERN
+const ib_rbt_node_t*
+rbt_first(
+/*======*/
+	const ib_rbt_t*	tree);		/*!< in: rb tree */
+/****************************************************************//**
+Return the right most data node in the tree
+@return	right most node */
+UNIV_INTERN
+const ib_rbt_node_t*
+rbt_last(
+/*=====*/
+	const ib_rbt_t*	tree);		/*!< in: rb tree */
+/****************************************************************//**
+Return the next node from current.
+@return	successor node to current that is passed in. */
+UNIV_INTERN
+const ib_rbt_node_t*
+rbt_next(
+/*=====*/
+	const ib_rbt_t*	tree,		/*!< in: rb tree */
+	const ib_rbt_node_t*		/*!< in: current node */
+			current);
+/****************************************************************//**
+Return the prev node from current.
+@return	precedessor node to current that is passed in */
+UNIV_INTERN
+const ib_rbt_node_t*
+rbt_prev(
+/*=====*/
+	const ib_rbt_t*	tree,		/*!< in: rb tree */
+	const ib_rbt_node_t*		/*!< in: current node */
+			current);
+/****************************************************************//**
+Find the node that has the lowest key that is >= key.
+@return	node that satisfies the lower bound constraint or NULL */
+UNIV_INTERN
+const ib_rbt_node_t*
+rbt_lower_bound(
+/*============*/
+	const ib_rbt_t*	tree,		/*!< in: rb tree */
+	const void*	key);		/*!< in: key to search */
+/****************************************************************//**
+Find the node that has the greatest key that is <= key.
+@return	node that satisifies the upper bound constraint or NULL */
+UNIV_INTERN
+const ib_rbt_node_t*
+rbt_upper_bound(
+/*============*/
+	const ib_rbt_t*	tree,		/*!< in: rb tree */
+	const void*	key);		/*!< in: key to search */
+/****************************************************************//**
+Search for the key, a node will be retuned in parent.last, whether it
+was found or not. If not found then parent.last will contain the
+parent node for the possibly new key otherwise the matching node.
+@return	result of last comparison */
+UNIV_INTERN
+int
+rbt_search(
+/*=======*/
+	const ib_rbt_t*	tree,		/*!< in: rb tree */
+	ib_rbt_bound_t*	parent,		/*!< in: search bounds */
+	const void*	key);		/*!< in: key to search */
+/****************************************************************//**
+Search for the key, a node will be retuned in parent.last, whether it
+was found or not. If not found then parent.last will contain the
+parent node for the possibly new key otherwise the matching node.
+@return	result of last comparison */
+UNIV_INTERN
+int
+rbt_search_cmp(
+/*===========*/
+	const ib_rbt_t*	tree,		/*!< in: rb tree */
+	ib_rbt_bound_t*	parent,		/*!< in: search bounds */
+	const void*	key,		/*!< in: key to search */
+	ib_rbt_compare	compare);	/*!< in: comparator */
+/****************************************************************//**
+Clear the tree, deletes (and free's) all the nodes. */
+UNIV_INTERN
+void
+rbt_clear(
+/*======*/
+	ib_rbt_t*	tree);		/*!< in: rb tree */
+/****************************************************************//**
+Merge the node from dst into src. Return the number of nodes merged.
+@return	no. of recs merged */
+UNIV_INTERN
+ulint
+rbt_merge_uniq(
+/*===========*/
+	ib_rbt_t*	dst,		/*!< in: dst rb tree */
+	const ib_rbt_t*	src);		/*!< in: src rb tree */
+/****************************************************************//**
+Merge the node from dst into src. Return the number of nodes merged.
+Delete the nodes from src after copying node to dst. As a side effect
+the duplicates will be left untouched in the src, since we don't support
+duplicates (yet). NOTE: src and dst must be similar, the function doesn't
+check for this condition (yet).
+@return	no. of recs merged */
+UNIV_INTERN
+ulint
+rbt_merge_uniq_destructive(
+/*=======================*/
+	ib_rbt_t*	dst,		/*!< in: dst rb tree */
+	ib_rbt_t*	src);		/*!< in: src rb tree */
+/****************************************************************//**
+Verify the integrity of the RB tree. For debugging. 0 failure else height
+of tree (in count of black nodes).
+@return	TRUE if OK FALSE if tree invalid. */
+UNIV_INTERN
+ibool
+rbt_validate(
+/*=========*/
+	const ib_rbt_t*	tree);		/*!< in: tree to validate */
+/****************************************************************//**
+Iterate over the tree in depth first order. */
+UNIV_INTERN
+void
+rbt_print(
+/*======*/
+	const ib_rbt_t*		tree,	/*!< in: tree to traverse */
+	ib_rbt_print_node	print);	/*!< in: print function */
+
+#endif /* INNOBASE_UT0RBT_H */

=== modified file 'storage/innodb_plugin/include/ut0rnd.ic'
--- a/storage/innodb_plugin/include/ut0rnd.ic	2009-05-27 09:45:59 +0000
+++ b/storage/innodb_plugin/include/ut0rnd.ic	2010-04-01 13:02:01 +0000
@@ -152,6 +152,7 @@ ut_hash_ulint(
 	ulint	 key,		/*!< in: value to be hashed */
 	ulint	 table_size)	/*!< in: hash table size */
 {
+	ut_ad(table_size);
 	key = key ^ UT_HASH_RANDOM_MASK2;
 
 	return(key % table_size);

=== modified file 'storage/innodb_plugin/lock/lock0lock.c'
--- a/storage/innodb_plugin/lock/lock0lock.c	2009-12-01 10:38:40 +0000
+++ b/storage/innodb_plugin/lock/lock0lock.c	2010-04-01 11:59:25 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -376,6 +376,7 @@ UNIV_INTERN FILE*	lock_latest_err_file;
 /* Flags for recursive deadlock search */
 #define LOCK_VICTIM_IS_START	1
 #define LOCK_VICTIM_IS_OTHER	2
+#define LOCK_EXCEED_MAX_DEPTH	3
 
 /********************************************************************//**
 Checks if a lock request results in a deadlock.
@@ -394,24 +395,25 @@ Looks recursively for a deadlock.
 deadlock and we chose 'start' as the victim, LOCK_VICTIM_IS_OTHER if a
 deadlock was found and we chose some other trx as a victim: we must do
 the search again in this last case because there may be another
-deadlock! */
+deadlock!
+LOCK_EXCEED_MAX_DEPTH if the lock search exceeds max steps or max depth. */
 static
 ulint
 lock_deadlock_recursive(
 /*====================*/
 	trx_t*	start,		/*!< in: recursion starting point */
 	trx_t*	trx,		/*!< in: a transaction waiting for a lock */
-	lock_t*	wait_lock,	/*!< in: the lock trx is waiting to be granted */
+	lock_t*	wait_lock,	/*!< in:  lock that is waiting to be granted */
 	ulint*	cost,		/*!< in/out: number of calculation steps thus
 				far: if this exceeds LOCK_MAX_N_STEPS_...
-				we return LOCK_VICTIM_IS_START */
+				we return LOCK_EXCEED_MAX_DEPTH */
 	ulint	depth);		/*!< in: recursion depth: if this exceeds
 				LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK, we
-				return LOCK_VICTIM_IS_START */
+				return LOCK_EXCEED_MAX_DEPTH */
 
 /*********************************************************************//**
 Gets the nth bit of a record lock.
-@return	TRUE if bit set */
+@return	TRUE if bit set also if i == ULINT_UNDEFINED return FALSE*/
 UNIV_INLINE
 ibool
 lock_rec_get_nth_bit(
@@ -1222,7 +1224,7 @@ lock_rec_get_first_on_page(
 
 /*********************************************************************//**
 Gets the next explicit lock request on a record.
-@return	next lock, NULL if none exists */
+@return	next lock, NULL if none exists or if heap_no == ULINT_UNDEFINED */
 UNIV_INLINE
 lock_t*
 lock_rec_get_next(
@@ -3261,8 +3263,6 @@ lock_deadlock_occurs(
 	lock_t*	lock,	/*!< in: lock the transaction is requesting */
 	trx_t*	trx)	/*!< in: transaction */
 {
-	dict_table_t*	table;
-	dict_index_t*	index;
 	trx_t*		mark_trx;
 	ulint		ret;
 	ulint		cost	= 0;
@@ -3284,31 +3284,50 @@ retry:
 
 	ret = lock_deadlock_recursive(trx, trx, lock, &cost, 0);
 
-	if (ret == LOCK_VICTIM_IS_OTHER) {
+	switch (ret) {
+	case LOCK_VICTIM_IS_OTHER:
 		/* We chose some other trx as a victim: retry if there still
 		is a deadlock */
-
 		goto retry;
-	}
 
-	if (UNIV_UNLIKELY(ret == LOCK_VICTIM_IS_START)) {
-		if (lock_get_type_low(lock) & LOCK_TABLE) {
-			table = lock->un_member.tab_lock.table;
-			index = NULL;
+	case LOCK_EXCEED_MAX_DEPTH:
+		/* If the lock search exceeds the max step
+		or the max depth, the current trx will be
+		the victim. Print its information. */
+		rewind(lock_latest_err_file);
+		ut_print_timestamp(lock_latest_err_file);
+
+		fputs("TOO DEEP OR LONG SEARCH IN THE LOCK TABLE"
+		      " WAITS-FOR GRAPH, WE WILL ROLL BACK"
+		      " FOLLOWING TRANSACTION \n",
+		      lock_latest_err_file);
+
+		fputs("\n*** TRANSACTION:\n", lock_latest_err_file);
+		      trx_print(lock_latest_err_file, trx, 3000);
+
+		fputs("*** WAITING FOR THIS LOCK TO BE GRANTED:\n",
+		      lock_latest_err_file);
+
+		if (lock_get_type(lock) == LOCK_REC) {
+			lock_rec_print(lock_latest_err_file, lock);
 		} else {
-			index = lock->index;
-			table = index->table;
+			lock_table_print(lock_latest_err_file, lock);
 		}
+		break;
 
-		lock_deadlock_found = TRUE;
-
+	case LOCK_VICTIM_IS_START:
 		fputs("*** WE ROLL BACK TRANSACTION (2)\n",
 		      lock_latest_err_file);
+		break;
 
-		return(TRUE);
+	default:
+		/* No deadlock detected*/
+		return(FALSE);
 	}
 
-	return(FALSE);
+	lock_deadlock_found = TRUE;
+
+	return(TRUE);
 }
 
 /********************************************************************//**
@@ -3317,25 +3336,26 @@ Looks recursively for a deadlock.
 deadlock and we chose 'start' as the victim, LOCK_VICTIM_IS_OTHER if a
 deadlock was found and we chose some other trx as a victim: we must do
 the search again in this last case because there may be another
-deadlock! */
+deadlock!
+LOCK_EXCEED_MAX_DEPTH if the lock search exceeds max steps or max depth. */
 static
 ulint
 lock_deadlock_recursive(
 /*====================*/
 	trx_t*	start,		/*!< in: recursion starting point */
 	trx_t*	trx,		/*!< in: a transaction waiting for a lock */
-	lock_t*	wait_lock,	/*!< in: the lock trx is waiting to be granted */
+	lock_t*	wait_lock,	/*!< in: lock that is waiting to be granted */
 	ulint*	cost,		/*!< in/out: number of calculation steps thus
 				far: if this exceeds LOCK_MAX_N_STEPS_...
-				we return LOCK_VICTIM_IS_START */
+				we return LOCK_EXCEED_MAX_DEPTH */
 	ulint	depth)		/*!< in: recursion depth: if this exceeds
 				LOCK_MAX_DEPTH_IN_DEADLOCK_CHECK, we
-				return LOCK_VICTIM_IS_START */
+				return LOCK_EXCEED_MAX_DEPTH */
 {
+	ulint	ret;
 	lock_t*	lock;
-	ulint	bit_no		= ULINT_UNDEFINED;
 	trx_t*	lock_trx;
-	ulint	ret;
+	ulint	heap_no		= ULINT_UNDEFINED;
 
 	ut_a(trx);
 	ut_a(start);
@@ -3351,27 +3371,44 @@ lock_deadlock_recursive(
 
 	*cost = *cost + 1;
 
-	lock = wait_lock;
-
 	if (lock_get_type_low(wait_lock) == LOCK_REC) {
+		ulint		space;
+		ulint		page_no;
+
+		heap_no = lock_rec_find_set_bit(wait_lock);
+		ut_a(heap_no != ULINT_UNDEFINED);
+
+		space = wait_lock->un_member.rec_lock.space;
+		page_no = wait_lock->un_member.rec_lock.page_no;
 
-		bit_no = lock_rec_find_set_bit(wait_lock);
+		lock = lock_rec_get_first_on_page_addr(space, page_no);
+
+		/* Position the iterator on the first matching record lock. */
+		while (lock != NULL
+		       && lock != wait_lock
+		       && !lock_rec_get_nth_bit(lock, heap_no)) {
+
+			lock = lock_rec_get_next_on_page(lock);
+		}
+
+		if (lock == wait_lock) {
+			lock = NULL;
+		}
 
-		ut_a(bit_no != ULINT_UNDEFINED);
+		ut_ad(lock == NULL || lock_rec_get_nth_bit(lock, heap_no));
+
+	} else {
+		lock = wait_lock;
 	}
 
 	/* Look at the locks ahead of wait_lock in the lock queue */
 
 	for (;;) {
-		if (lock_get_type_low(lock) & LOCK_TABLE) {
+		/* Get previous table lock. */
+		if (heap_no == ULINT_UNDEFINED) {
 
-			lock = UT_LIST_GET_PREV(un_member.tab_lock.locks,
-						lock);
-		} else {
-			ut_ad(lock_get_type_low(lock) == LOCK_REC);
-			ut_a(bit_no != ULINT_UNDEFINED);
-
-			lock = (lock_t*) lock_rec_get_prev(lock, bit_no);
+			lock = UT_LIST_GET_PREV(
+				un_member.tab_lock.locks, lock);
 		}
 
 		if (lock == NULL) {
@@ -3389,7 +3426,7 @@ lock_deadlock_recursive(
 
 			lock_trx = lock->trx;
 
-			if (lock_trx == start || too_far) {
+			if (lock_trx == start) {
 
 				/* We came back to the recursion starting
 				point: a deadlock detected; or we have
@@ -3436,19 +3473,10 @@ lock_deadlock_recursive(
 				}
 #ifdef UNIV_DEBUG
 				if (lock_print_waits) {
-					fputs("Deadlock detected"
-					      " or too long search\n",
+					fputs("Deadlock detected\n",
 					      stderr);
 				}
 #endif /* UNIV_DEBUG */
-				if (too_far) {
-
-					fputs("TOO DEEP OR LONG SEARCH"
-					      " IN THE LOCK TABLE"
-					      " WAITS-FOR GRAPH\n", ef);
-
-					return(LOCK_VICTIM_IS_START);
-				}
 
 				if (trx_weight_cmp(wait_lock->trx,
 						   start) >= 0) {
@@ -3484,6 +3512,21 @@ lock_deadlock_recursive(
 				return(LOCK_VICTIM_IS_OTHER);
 			}
 
+			if (too_far) {
+
+#ifdef UNIV_DEBUG
+				if (lock_print_waits) {
+					fputs("Deadlock search exceeds"
+					      " max steps or depth.\n",
+					      stderr);
+				}
+#endif /* UNIV_DEBUG */
+				/* The information about transaction/lock
+				to be rolled back is available in the top
+				level. Do not print anything here. */
+				return(LOCK_EXCEED_MAX_DEPTH);
+			}
+
 			if (lock_trx->que_state == TRX_QUE_LOCK_WAIT) {
 
 				/* Another trx ahead has requested lock	in an
@@ -3493,12 +3536,28 @@ lock_deadlock_recursive(
 				ret = lock_deadlock_recursive(
 					start, lock_trx,
 					lock_trx->wait_lock, cost, depth + 1);
+
 				if (ret != 0) {
 
 					return(ret);
 				}
 			}
 		}
+		/* Get the next record lock to check. */
+		if (heap_no != ULINT_UNDEFINED) {
+
+			ut_a(lock != NULL);
+
+			do {
+				lock = lock_rec_get_next_on_page(lock);
+			} while (lock != NULL
+				&& lock != wait_lock
+				&& !lock_rec_get_nth_bit(lock, heap_no));
+
+			if (lock == wait_lock) {
+				lock = NULL;
+			}
+		}
 	}/* end of the 'for (;;)'-loop */
 }
 
@@ -3694,9 +3753,10 @@ lock_table_enqueue_waiting(
 
 /*********************************************************************//**
 Checks if other transactions have an incompatible mode lock request in
-the lock queue. */
+the lock queue.
+@return	lock or NULL */
 UNIV_INLINE
-ibool
+lock_t*
 lock_table_other_has_incompatible(
 /*==============================*/
 	trx_t*		trx,	/*!< in: transaction, or NULL if all
@@ -3718,13 +3778,13 @@ lock_table_other_has_incompatible(
 		    && (!lock_mode_compatible(lock_get_mode(lock), mode))
 		    && (wait || !(lock_get_wait(lock)))) {
 
-			return(TRUE);
+			return(lock);
 		}
 
 		lock = UT_LIST_GET_PREV(un_member.tab_lock.locks, lock);
 	}
 
-	return(FALSE);
+	return(NULL);
 }
 
 /*********************************************************************//**
@@ -4249,28 +4309,29 @@ lock_rec_print(
 
 	block = buf_page_try_get(space, page_no, &mtr);
 
-	if (block) {
-		for (i = 0; i < lock_rec_get_n_bits(lock); i++) {
-
-			if (lock_rec_get_nth_bit(lock, i)) {
+	for (i = 0; i < lock_rec_get_n_bits(lock); ++i) {
 
-				const rec_t*	rec
-					= page_find_rec_with_heap_no(
-						buf_block_get_frame(block), i);
-				offsets = rec_get_offsets(
-					rec, lock->index, offsets,
-					ULINT_UNDEFINED, &heap);
-
-				fprintf(file, "Record lock, heap no %lu ",
-					(ulong) i);
-				rec_print_new(file, rec, offsets);
-				putc('\n', file);
-			}
+		if (!lock_rec_get_nth_bit(lock, i)) {
+			continue;
 		}
-	} else {
-		for (i = 0; i < lock_rec_get_n_bits(lock); i++) {
-			fprintf(file, "Record lock, heap no %lu\n", (ulong) i);
+
+		fprintf(file, "Record lock, heap no %lu", (ulong) i);
+
+		if (block) {
+			const rec_t*	rec;
+
+			rec = page_find_rec_with_heap_no(
+				buf_block_get_frame(block), i);
+
+			offsets = rec_get_offsets(
+				rec, lock->index, offsets,
+				ULINT_UNDEFINED, &heap);
+
+			putc(' ', file);
+			rec_print_new(file, rec, offsets);
 		}
+
+		putc('\n', file);
 	}
 
 	mtr_commit(&mtr);
@@ -4317,14 +4378,26 @@ lock_get_n_rec_locks(void)
 #endif /* PRINT_NUM_OF_LOCK_STRUCTS */
 
 /*********************************************************************//**
-Prints info of locks for all transactions. */
+Prints info of locks for all transactions.
+@return FALSE if not able to obtain kernel mutex
+and exits without printing info */
 UNIV_INTERN
-void
+ibool
 lock_print_info_summary(
 /*====================*/
-	FILE*	file)	/*!< in: file where to print */
+	FILE*	file,	/*!< in: file where to print */
+	ibool   nowait)	/*!< in: whether to wait for the kernel mutex */
 {
-	lock_mutex_enter_kernel();
+	/* if nowait is FALSE, wait on the kernel mutex,
+	otherwise return immediately if fail to obtain the
+	mutex. */
+	if (!nowait) {
+		lock_mutex_enter_kernel();
+	} else if (mutex_enter_nowait(&kernel_mutex)) {
+		fputs("FAIL TO OBTAIN KERNEL MUTEX, "
+		      "SKIP LOCK INFO PRINTING\n", file);
+		return(FALSE);
+	}
 
 	if (lock_deadlock_found) {
 		fputs("------------------------\n"
@@ -4356,6 +4429,7 @@ lock_print_info_summary(
 		"Total number of lock structs in row lock hash table %lu\n",
 		(ulong) lock_get_n_rec_locks());
 #endif /* PRINT_NUM_OF_LOCK_STRUCTS */
+	return(TRUE);
 }
 
 /*********************************************************************//**
@@ -4753,6 +4827,13 @@ loop:
 	     || lock->trx->conc_state == TRX_PREPARED
 	     || lock->trx->conc_state == TRX_COMMITTED_IN_MEMORY);
 
+# ifdef UNIV_SYNC_DEBUG
+	/* Only validate the record queues when this thread is not
+	holding a space->latch.  Deadlocks are possible due to
+	latching order violation when UNIV_DEBUG is defined while
+	UNIV_SYNC_DEBUG is not. */
+	if (!sync_thread_levels_contains(SYNC_FSP))
+# endif /* UNIV_SYNC_DEBUG */
 	for (i = nth_bit; i < lock_rec_get_n_bits(lock); i++) {
 
 		if (i == 1 || lock_rec_get_nth_bit(lock, i)) {
@@ -4918,7 +4999,7 @@ lock_rec_insert_check_and_lock(
 	}
 
 	trx = thr_get_trx(thr);
-	next_rec = page_rec_get_next((rec_t*) rec);
+	next_rec = page_rec_get_next_const(rec);
 	next_rec_heap_no = page_rec_get_heap_no(next_rec);
 
 	lock_mutex_enter_kernel();

=== modified file 'storage/innodb_plugin/log/log0log.c'
--- a/storage/innodb_plugin/log/log0log.c	2009-11-30 11:32:05 +0000
+++ b/storage/innodb_plugin/log/log0log.c	2010-04-01 11:59:25 +0000
@@ -1,23 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
 Copyright (c) 2009, Google Inc.
 
 Portions of this file contain modifications contributed and copyrighted by
@@ -2013,7 +1996,7 @@ log_checkpoint(
 		return(TRUE);
 	}
 
-	ut_ad(log_sys->written_to_all_lsn >= oldest_lsn);
+	ut_ad(log_sys->flushed_to_disk_lsn >= oldest_lsn);
 
 	if (log_sys->n_pending_checkpoint_writes > 0) {
 		/* A checkpoint write is running */
@@ -3095,7 +3078,7 @@ loop:
 
 	if (srv_fast_shutdown < 2
 	   && (srv_error_monitor_active
-	      || srv_lock_timeout_and_monitor_active)) {
+	      || srv_lock_timeout_active || srv_monitor_active)) {
 
 		mutex_exit(&kernel_mutex);
 

=== modified file 'storage/innodb_plugin/log/log0recv.c'
--- a/storage/innodb_plugin/log/log0recv.c	2009-11-30 11:32:05 +0000
+++ b/storage/innodb_plugin/log/log0recv.c	2010-04-01 12:56:22 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1997, 2010, Innobase Oy. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -138,7 +138,9 @@ UNIV_INTERN ulint	recv_max_parsed_page_n
 /** This many frames must be left free in the buffer pool when we scan
 the log and store the scanned log records in the buffer pool: we will
 use these free frames to read in pages when we start applying the
-log records to the database. */
+log records to the database.
+This is the default value. If the actual size of the buffer pool is
+larger than 10 MB we'll set this value to 512. */
 UNIV_INTERN ulint	recv_n_pool_free_frames;
 
 /** The maximum lsn we see for a page during the recovery process. If this
@@ -239,6 +241,7 @@ recv_sys_mem_free(void)
 	}
 }
 
+#ifndef UNIV_HOTBACKUP
 /************************************************************
 Reset the state of the recovery system variables. */
 UNIV_INTERN
@@ -278,6 +281,7 @@ recv_sys_var_init(void)
 
 	recv_max_page_lsn = 0;
 }
+#endif /* !UNIV_HOTBACKUP */
 
 /************************************************************
 Inits the recovery system for a recovery operation. */
@@ -292,6 +296,12 @@ recv_sys_init(
 		return;
 	}
 
+	/* Initialize red-black tree for fast insertions into the
+	flush_list during recovery process.
+	As this initialization is done while holding the buffer pool
+	mutex we perform it before acquiring recv_sys->mutex. */
+	buf_flush_init_flush_rbt();
+
 	mutex_enter(&(recv_sys->mutex));
 
 #ifndef UNIV_HOTBACKUP
@@ -301,6 +311,12 @@ recv_sys_init(
 	recv_is_from_backup = TRUE;
 #endif /* !UNIV_HOTBACKUP */
 
+	/* Set appropriate value of recv_n_pool_free_frames. */
+	if (buf_pool_get_curr_size() >= (10 * 1024 * 1024)) {
+		/* Buffer pool of size greater than 10 MB. */
+		recv_n_pool_free_frames = 512;
+	}
+
 	recv_sys->buf = ut_malloc(RECV_PARSING_BUF_SIZE);
 	recv_sys->len = 0;
 	recv_sys->recovered_offset = 0;
@@ -370,6 +386,9 @@ recv_sys_debug_free(void)
 	recv_sys->last_block_buf_start = NULL;
 
 	mutex_exit(&(recv_sys->mutex));
+
+	/* Free up the flush_rbt. */
+	buf_flush_free_flush_rbt();
 }
 # endif /* UNIV_LOG_DEBUG */
 
@@ -2050,15 +2069,6 @@ recv_parse_log_rec(
 	}
 #endif /* UNIV_LOG_LSN_DEBUG */
 
-	/* Check that page_no is sensible */
-
-	if (UNIV_UNLIKELY(*page_no > 0x8FFFFFFFUL)) {
-
-		recv_sys->found_corrupt_log = TRUE;
-
-		return(0);
-	}
-
 	new_ptr = recv_parse_or_apply_log_rec_body(*type, new_ptr, end_ptr,
 						   NULL, NULL);
 	if (UNIV_UNLIKELY(new_ptr == NULL)) {
@@ -2167,6 +2177,14 @@ recv_report_corrupt_log(
 		putc('\n', stderr);
 	}
 
+#ifndef UNIV_HOTBACKUP
+	if (!srv_force_recovery) {
+		fputs("InnoDB: Set innodb_force_recovery"
+		      " to ignore this error.\n", stderr);
+		ut_error;
+	}
+#endif /* !UNIV_HOTBACKUP */
+
 	fputs("InnoDB: WARNING: the log file may have been corrupt and it\n"
 	      "InnoDB: is possible that the log scan did not proceed\n"
 	      "InnoDB: far enough in recovery! Please run CHECK TABLE\n"
@@ -2556,7 +2574,7 @@ recv_scan_log_recs(
 
 	ut_ad(start_lsn % OS_FILE_LOG_BLOCK_SIZE == 0);
 	ut_ad(len % OS_FILE_LOG_BLOCK_SIZE == 0);
-	ut_ad(len > 0);
+	ut_ad(len >= OS_FILE_LOG_BLOCK_SIZE);
 	ut_a(store_to_hash <= TRUE);
 
 	finished = FALSE;
@@ -2681,6 +2699,16 @@ recv_scan_log_recs(
 
 				recv_sys->found_corrupt_log = TRUE;
 
+#ifndef UNIV_HOTBACKUP
+				if (!srv_force_recovery) {
+					fputs("InnoDB: Set"
+					      " innodb_force_recovery"
+					      " to ignore this error.\n",
+					      stderr);
+					ut_error;
+				}
+#endif /* !UNIV_HOTBACKUP */
+
 			} else if (!recv_sys->found_corrupt_log) {
 				more_data = recv_sys_add_to_parsing_buf(
 					log_block, scanned_lsn);
@@ -3210,8 +3238,6 @@ void
 recv_recovery_from_checkpoint_finish(void)
 /*======================================*/
 {
-	int		i;
-
 	/* Apply the hashed log records to the respective file pages */
 
 	if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
@@ -3259,9 +3285,16 @@ recv_recovery_from_checkpoint_finish(voi
 	The data dictionary latch should guarantee that there is at
 	most one data dictionary transaction active at a time. */
 	trx_rollback_or_clean_recovered(FALSE);
+}
 
-	/* Drop partially created indexes. */
-	row_merge_drop_temp_indexes();
+/********************************************************//**
+Initiates the rollback of active transactions. */
+UNIV_INTERN
+void
+recv_recovery_rollback_active(void)
+/*===============================*/
+{
+	int		i;
 
 #ifdef UNIV_SYNC_DEBUG
 	/* Wait for a while so that created threads have time to suspend
@@ -3271,6 +3304,11 @@ recv_recovery_from_checkpoint_finish(voi
 	/* Switch latching order checks on in sync0sync.c */
 	sync_order_checks_on = TRUE;
 #endif
+	/* Drop partially created indexes. */
+	row_merge_drop_temp_indexes();
+	/* Drop temporary tables. */
+	row_mysql_drop_temp_tables();
+
 	if (srv_force_recovery < SRV_FORCE_NO_TRX_UNDO) {
 		/* Rollback the uncommitted transactions which have no user
 		session */

=== modified file 'storage/innodb_plugin/mem/mem0dbg.c'
--- a/storage/innodb_plugin/mem/mem0dbg.c	2009-11-30 11:32:05 +0000
+++ b/storage/innodb_plugin/mem/mem0dbg.c	2010-04-01 11:59:25 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -180,6 +180,10 @@ mem_close(void)
 {
 	mem_pool_free(mem_comm_pool);
 	mem_comm_pool = NULL;
+#ifdef UNIV_MEM_DEBUG
+	mutex_free(&mem_hash_mutex);
+	mem_hash_initialized = FALSE;
+#endif /* UNIV_MEM_DEBUG */
 }
 #endif /* !UNIV_HOTBACKUP */
 

=== modified file 'storage/innodb_plugin/mem/mem0mem.c'
--- a/storage/innodb_plugin/mem/mem0mem.c	2009-10-08 10:00:49 +0000
+++ b/storage/innodb_plugin/mem/mem0mem.c	2010-04-01 11:59:25 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -383,6 +383,20 @@ mem_heap_create_block(
 	mem_block_set_free(block, MEM_BLOCK_HEADER_SIZE);
 	mem_block_set_start(block, MEM_BLOCK_HEADER_SIZE);
 
+	if (UNIV_UNLIKELY(heap == NULL)) {
+		/* This is the first block of the heap. The field
+		total_size should be initialized here */
+		block->total_size = len;
+	} else {
+		/* Not the first allocation for the heap. This block's
+		total_length field should be set to undefined. */
+		ut_d(block->total_size = ULINT_UNDEFINED);
+		UNIV_MEM_INVALID(&block->total_size,
+				 sizeof block->total_size);
+
+		heap->total_size += len;
+	}
+
 	ut_ad((ulint)MEM_BLOCK_HEADER_SIZE < len);
 
 	return(block);
@@ -471,6 +485,10 @@ mem_heap_block_free(
 
 	mem_pool_mutex_exit();
 #endif
+
+	ut_ad(heap->total_size >= block->len);
+	heap->total_size -= block->len;
+
 	type = heap->type;
 	len = block->len;
 	block->magic_n = MEM_FREED_BLOCK_MAGIC_N;

=== modified file 'storage/innodb_plugin/os/os0file.c'
--- a/storage/innodb_plugin/os/os0file.c	2009-11-30 12:04:09 +0000
+++ b/storage/innodb_plugin/os/os0file.c	2010-04-01 12:27:53 +0000
@@ -1,23 +1,6 @@
-/*****************************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
-FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License along with
-this program; if not, write to the Free Software Foundation, Inc., 59 Temple
-Place, Suite 330, Boston, MA 02111-1307 USA
-
-*****************************************************************************/
 /***********************************************************************
 
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
 Copyright (c) 2009, Percona Inc.
 
 Portions of this file contain modifications contributed and copyrighted
@@ -806,7 +789,15 @@ next_file:
 #ifdef HAVE_READDIR_R
 	ret = readdir_r(dir, (struct dirent*)dirent_buf, &ent);
 
-	if (ret != 0) {
+	if (ret != 0
+#ifdef UNIV_AIX
+	    /* On AIX, only if we got non-NULL 'ent' (result) value and
+	    a non-zero 'ret' (return) value, it indicates a failed
+	    readdir_r() call. An NULL 'ent' with an non-zero 'ret'
+	    would indicate the "end of the directory" is reached. */
+	    && ent != NULL
+#endif
+	   ) {
 		fprintf(stderr,
 			"InnoDB: cannot read directory %s, error %lu\n",
 			dirname, (ulong)ret);
@@ -3923,6 +3914,9 @@ os_aio_simulated_handle(
 	ulint		n;
 	ulint		i;
 
+	/* Fix compiler warning */
+	*consecutive_ios = NULL;
+
 	segment = os_aio_get_array_and_local_segment(&array, global_segment);
 
 restart:

=== modified file 'storage/innodb_plugin/page/page0page.c'
--- a/storage/innodb_plugin/page/page0page.c	2009-10-09 14:13:15 +0000
+++ b/storage/innodb_plugin/page/page0page.c	2010-04-01 12:58:02 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -658,6 +658,14 @@ page_copy_rec_list_end(
 						index, mtr);
 	}
 
+	/* Update PAGE_MAX_TRX_ID on the uncompressed page.
+	Modifications will be redo logged and copied to the compressed
+	page in page_zip_compress() or page_zip_reorganize() below. */
+	if (dict_index_is_sec_or_ibuf(index) && page_is_leaf(page)) {
+		page_update_max_trx_id(new_block, NULL,
+				       page_get_max_trx_id(page), mtr);
+	}
+
 	if (UNIV_LIKELY_NULL(new_page_zip)) {
 		mtr_set_log_mode(mtr, log_mode);
 
@@ -696,15 +704,10 @@ page_copy_rec_list_end(
 		}
 	}
 
-	/* Update the lock table, MAX_TRX_ID, and possible hash index */
+	/* Update the lock table and possible hash index */
 
 	lock_move_rec_list_end(new_block, block, rec);
 
-	if (dict_index_is_sec_or_ibuf(index) && page_is_leaf(page)) {
-		page_update_max_trx_id(new_block, new_page_zip,
-				       page_get_max_trx_id(page), mtr);
-	}
-
 	btr_search_move_or_delete_hash_entries(new_block, block, index);
 
 	return(ret);
@@ -772,6 +775,16 @@ page_copy_rec_list_start(
 		mem_heap_free(heap);
 	}
 
+	/* Update PAGE_MAX_TRX_ID on the uncompressed page.
+	Modifications will be redo logged and copied to the compressed
+	page in page_zip_compress() or page_zip_reorganize() below. */
+	if (dict_index_is_sec_or_ibuf(index)
+	    && page_is_leaf(page_align(rec))) {
+		page_update_max_trx_id(new_block, NULL,
+				       page_get_max_trx_id(page_align(rec)),
+				       mtr);
+	}
+
 	if (UNIV_LIKELY_NULL(new_page_zip)) {
 		mtr_set_log_mode(mtr, log_mode);
 
@@ -809,14 +822,7 @@ page_copy_rec_list_start(
 		}
 	}
 
-	/* Update MAX_TRX_ID, the lock table, and possible hash index */
-
-	if (dict_index_is_sec_or_ibuf(index)
-	    && page_is_leaf(page_align(rec))) {
-		page_update_max_trx_id(new_block, new_page_zip,
-				       page_get_max_trx_id(page_align(rec)),
-				       mtr);
-	}
+	/* Update the lock table and possible hash index */
 
 	lock_move_rec_list_start(new_block, block, rec, ret);
 
@@ -2408,8 +2414,13 @@ page_validate(
 		}
 
 		offs = page_offset(rec_get_start(rec, offsets));
+		i = rec_offs_size(offsets);
+		if (UNIV_UNLIKELY(offs + i >= UNIV_PAGE_SIZE)) {
+			fputs("InnoDB: record offset out of bounds\n", stderr);
+			goto func_exit;
+		}
 
-		for (i = rec_offs_size(offsets); i--; ) {
+		while (i--) {
 			if (UNIV_UNLIKELY(buf[offs + i])) {
 				/* No other record may overlap this */
 
@@ -2517,8 +2528,13 @@ n_owned_zero:
 
 		count++;
 		offs = page_offset(rec_get_start(rec, offsets));
+		i = rec_offs_size(offsets);
+		if (UNIV_UNLIKELY(offs + i >= UNIV_PAGE_SIZE)) {
+			fputs("InnoDB: record offset out of bounds\n", stderr);
+			goto func_exit;
+		}
 
-		for (i = rec_offs_size(offsets); i--; ) {
+		while (i--) {
 
 			if (UNIV_UNLIKELY(buf[offs + i])) {
 				fputs("InnoDB: Record overlaps another"

=== modified file 'storage/innodb_plugin/plug.in'
--- a/storage/innodb_plugin/plug.in	2009-10-12 12:00:56 +0000
+++ b/storage/innodb_plugin/plug.in	2010-04-01 13:01:22 +0000
@@ -1,5 +1,5 @@
 #
-# Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
+# Copyright (c) 2006, 2010, Innobase Oy. All Rights Reserved.
 # 
 # This program is free software; you can redistribute it and/or modify it under
 # the terms of the GNU General Public License as published by the Free Software

=== modified file 'storage/innodb_plugin/rem/rem0rec.c'
--- a/storage/innodb_plugin/rem/rem0rec.c	2009-05-27 09:45:59 +0000
+++ b/storage/innodb_plugin/rem/rem0rec.c	2010-04-01 11:59:25 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1994, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1994, 2010, Innobase Oy. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -695,19 +695,9 @@ rec_get_nth_field_offs_old(
 	ulint	os;
 	ulint	next_os;
 
-	ut_ad(rec && len);
-	ut_ad(n < rec_get_n_fields_old(rec));
-
-	if (UNIV_UNLIKELY(n > REC_MAX_N_FIELDS)) {
-		fprintf(stderr, "Error: trying to access field %lu in rec\n",
-			(ulong) n);
-		ut_error;
-	}
-
-	if (UNIV_UNLIKELY(rec == NULL)) {
-		fputs("Error: rec is NULL pointer\n", stderr);
-		ut_error;
-	}
+	ut_ad(len);
+	ut_a(rec);
+	ut_a(n < rec_get_n_fields_old(rec));
 
 	if (rec_get_1byte_offs_flag(rec)) {
 		os = rec_1_get_field_start_offs(rec, n);

=== modified file 'storage/innodb_plugin/row/row0ins.c'
--- a/storage/innodb_plugin/row/row0ins.c	2009-11-03 10:23:02 +0000
+++ b/storage/innodb_plugin/row/row0ins.c	2010-04-01 11:59:25 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -1991,7 +1991,7 @@ row_ins_index_entry_low(
 
 	btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE,
 				    mode | BTR_INSERT | ignore_sec_unique,
-				    &cursor, 0, &mtr);
+				    &cursor, 0, __FILE__, __LINE__, &mtr);
 
 	if (cursor.flag == BTR_CUR_INSERT_TO_IBUF) {
 		/* The insertion was made to the insert buffer already during
@@ -2049,7 +2049,8 @@ row_ins_index_entry_low(
 			btr_cur_search_to_nth_level(index, 0, entry,
 						    PAGE_CUR_LE,
 						    mode | BTR_INSERT,
-						    &cursor, 0, &mtr);
+						    &cursor, 0,
+						    __FILE__, __LINE__, &mtr);
 		}
 	}
 
@@ -2104,7 +2105,8 @@ function_exit:
 		mtr_start(&mtr);
 
 		btr_cur_search_to_nth_level(index, 0, entry, PAGE_CUR_LE,
-					    BTR_MODIFY_TREE, &cursor, 0, &mtr);
+					    BTR_MODIFY_TREE, &cursor, 0,
+					    __FILE__, __LINE__, &mtr);
 		rec = btr_cur_get_rec(&cursor);
 		offsets = rec_get_offsets(rec, index, NULL,
 					  ULINT_UNDEFINED, &heap);

=== modified file 'storage/innodb_plugin/row/row0merge.c'
--- a/storage/innodb_plugin/row/row0merge.c	2009-11-30 12:24:54 +0000
+++ b/storage/innodb_plugin/row/row0merge.c	2010-04-01 12:01:10 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2005, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2005, 2010, Innobase Oy. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -424,14 +424,13 @@ row_merge_dup_report(
 	row_merge_dup_t*	dup,	/*!< in/out: for reporting duplicates */
 	const dfield_t*		entry)	/*!< in: duplicate index entry */
 {
-	mrec_buf_t 		buf;
+	mrec_buf_t* 		buf;
 	const dtuple_t*		tuple;
 	dtuple_t		tuple_store;
 	const rec_t*		rec;
 	const dict_index_t*	index	= dup->index;
 	ulint			n_fields= dict_index_get_n_fields(index);
-	mem_heap_t*		heap	= NULL;
-	ulint			offsets_[REC_OFFS_NORMAL_SIZE];
+	mem_heap_t*		heap;
 	ulint*			offsets;
 	ulint			n_ext;
 
@@ -441,22 +440,22 @@ row_merge_dup_report(
 		return;
 	}
 
-	rec_offs_init(offsets_);
-
 	/* Convert the tuple to a record and then to MySQL format. */
+	heap = mem_heap_create((1 + REC_OFFS_HEADER_SIZE + n_fields)
+			       * sizeof *offsets
+			       + sizeof *buf);
+
+	buf = mem_heap_alloc(heap, sizeof *buf);
 
 	tuple = dtuple_from_fields(&tuple_store, entry, n_fields);
 	n_ext = dict_index_is_clust(index) ? dtuple_get_n_ext(tuple) : 0;
 
-	rec = rec_convert_dtuple_to_rec(buf, index, tuple, n_ext);
-	offsets = rec_get_offsets(rec, index, offsets_, ULINT_UNDEFINED,
-				  &heap);
+	rec = rec_convert_dtuple_to_rec(*buf, index, tuple, n_ext);
+	offsets = rec_get_offsets(rec, index, NULL, ULINT_UNDEFINED, &heap);
 
 	innobase_rec_to_mysql(dup->table, rec, index, offsets);
 
-	if (UNIV_LIKELY_NULL(heap)) {
-		mem_heap_free(heap);
-	}
+	mem_heap_free(heap);
 }
 
 /*************************************************************//**
@@ -627,22 +626,26 @@ row_merge_buf_write(
 }
 
 /******************************************************//**
-Create a memory heap and allocate space for row_merge_rec_offsets().
+Create a memory heap and allocate space for row_merge_rec_offsets()
+and mrec_buf_t[3].
 @return	memory heap */
 static
 mem_heap_t*
 row_merge_heap_create(
 /*==================*/
 	const dict_index_t*	index,		/*!< in: record descriptor */
+	mrec_buf_t**		buf,		/*!< out: 3 buffers */
 	ulint**			offsets1,	/*!< out: offsets */
 	ulint**			offsets2)	/*!< out: offsets */
 {
 	ulint		i	= 1 + REC_OFFS_HEADER_SIZE
 		+ dict_index_get_n_fields(index);
-	mem_heap_t*	heap	= mem_heap_create(2 * i * sizeof *offsets1);
+	mem_heap_t*	heap	= mem_heap_create(2 * i * sizeof **offsets1
+						  + 3 * sizeof **buf);
 
-	*offsets1 = mem_heap_alloc(heap, i * sizeof *offsets1);
-	*offsets2 = mem_heap_alloc(heap, i * sizeof *offsets2);
+	*buf = mem_heap_alloc(heap, 3 * sizeof **buf);
+	*offsets1 = mem_heap_alloc(heap, i * sizeof **offsets1);
+	*offsets2 = mem_heap_alloc(heap, i * sizeof **offsets2);
 
 	(*offsets1)[0] = (*offsets2)[0] = i;
 	(*offsets1)[1] = (*offsets2)[1] = dict_index_get_n_fields(index);
@@ -1394,7 +1397,8 @@ row_merge_blocks(
 {
 	mem_heap_t*	heap;	/*!< memory heap for offsets0, offsets1 */
 
-	mrec_buf_t	buf[3];	/*!< buffer for handling split mrec in block[] */
+	mrec_buf_t*	buf;	/*!< buffer for handling
+				split mrec in block[] */
 	const byte*	b0;	/*!< pointer to block[0] */
 	const byte*	b1;	/*!< pointer to block[1] */
 	byte*		b2;	/*!< pointer to block[2] */
@@ -1414,7 +1418,7 @@ row_merge_blocks(
 	}
 #endif /* UNIV_DEBUG */
 
-	heap = row_merge_heap_create(index, &offsets0, &offsets1);
+	heap = row_merge_heap_create(index, &buf, &offsets0, &offsets1);
 
 	/* Write a record and read the next record.  Split the output
 	file in two halves, which can be merged on the following pass. */
@@ -1500,7 +1504,7 @@ row_merge_blocks_copy(
 {
 	mem_heap_t*	heap;	/*!< memory heap for offsets0, offsets1 */
 
-	mrec_buf_t	buf[3];	/*!< buffer for handling
+	mrec_buf_t*	buf;	/*!< buffer for handling
 				split mrec in block[] */
 	const byte*	b0;	/*!< pointer to block[0] */
 	byte*		b2;	/*!< pointer to block[2] */
@@ -1518,7 +1522,7 @@ row_merge_blocks_copy(
 	}
 #endif /* UNIV_DEBUG */
 
-	heap = row_merge_heap_create(index, &offsets0, &offsets1);
+	heap = row_merge_heap_create(index, &buf, &offsets0, &offsets1);
 
 	/* Write a record and read the next record.  Split the output
 	file in two halves, which can be merged on the following pass. */
@@ -1760,7 +1764,6 @@ row_merge_insert_index_tuples(
 	int			fd,	/*!< in: file descriptor */
 	row_merge_block_t*	block)	/*!< in/out: file buffer */
 {
-	mrec_buf_t		buf;
 	const byte*		b;
 	que_thr_t*		thr;
 	ins_node_t*		node;
@@ -1779,7 +1782,7 @@ row_merge_insert_index_tuples(
 
 	trx->op_info = "inserting index entries";
 
-	graph_heap = mem_heap_create(500);
+	graph_heap = mem_heap_create(500 + sizeof(mrec_buf_t));
 	node = ins_node_create(INS_DIRECT, table, graph_heap);
 
 	thr = pars_complete_graph_for_exec(node, trx, graph_heap);
@@ -1801,12 +1804,14 @@ row_merge_insert_index_tuples(
 	if (!row_merge_read(fd, foffs, block)) {
 		error = DB_CORRUPTION;
 	} else {
+		mrec_buf_t*	buf = mem_heap_alloc(graph_heap, sizeof *buf);
+
 		for (;;) {
 			const mrec_t*	mrec;
 			dtuple_t*	dtuple;
 			ulint		n_ext;
 
-			b = row_merge_read_rec(block, &buf, b, index,
+			b = row_merge_read_rec(block, buf, b, index,
 					       fd, &foffs, &mrec, offsets);
 			if (UNIV_UNLIKELY(!b)) {
 				/* End of list, or I/O error */
@@ -1977,14 +1982,12 @@ row_merge_drop_index(
 		/* Drop the field definitions of the index. */
 		"DELETE FROM SYS_FIELDS WHERE INDEX_ID = :indexid;\n"
 		/* Drop the index definition and the B-tree. */
-		"DELETE FROM SYS_INDEXES WHERE ID = :indexid\n"
-		"		AND TABLE_ID = :tableid;\n"
+		"DELETE FROM SYS_INDEXES WHERE ID = :indexid;\n"
 		"END;\n";
 
 	ut_ad(index && table && trx);
 
 	pars_info_add_dulint_literal(info, "indexid", index->id);
-	pars_info_add_dulint_literal(info, "tableid", table->id);
 
 	trx_start_if_not_started(trx);
 	trx->op_info = "dropping index";
@@ -2033,47 +2036,79 @@ row_merge_drop_temp_indexes(void)
 /*=============================*/
 {
 	trx_t*		trx;
-	ulint		err;
-
-	/* We use the private SQL parser of Innobase to generate the
-	query graphs needed in deleting the dictionary data from system
-	tables in Innobase. Deleting a row from SYS_INDEXES table also
-	frees the file segments of the B-tree associated with the index. */
-	static const char drop_temp_indexes[] =
-		"PROCEDURE DROP_TEMP_INDEXES_PROC () IS\n"
-		"indexid CHAR;\n"
-		"DECLARE CURSOR c IS SELECT ID FROM SYS_INDEXES\n"
-		"WHERE SUBSTR(NAME,0,1)='" TEMP_INDEX_PREFIX_STR "';\n"
-		"BEGIN\n"
-		"\tOPEN c;\n"
-		"\tWHILE 1=1 LOOP\n"
-		"\t\tFETCH c INTO indexid;\n"
-		"\t\tIF (SQL % NOTFOUND) THEN\n"
-		"\t\t\tEXIT;\n"
-		"\t\tEND IF;\n"
-		"\t\tDELETE FROM SYS_FIELDS WHERE INDEX_ID = indexid;\n"
-		"\t\tDELETE FROM SYS_INDEXES WHERE ID = indexid;\n"
-		"\tEND LOOP;\n"
-		"\tCLOSE c;\n"
-		"\tCOMMIT WORK;\n"
-		"END;\n";
+	btr_pcur_t	pcur;
+	mtr_t		mtr;
 
+	/* Load the table definitions that contain partially defined
+	indexes, so that the data dictionary information can be checked
+	when accessing the tablename.ibd files. */
 	trx = trx_allocate_for_background();
 	trx->op_info = "dropping partially created indexes";
 	row_mysql_lock_data_dictionary(trx);
 
-	/* Incomplete transactions may be holding some locks on the
-	data dictionary tables.  However, they should never have been
-	able to lock the records corresponding to the partially
-	created indexes that we are attempting to delete, because the
-	table was locked when the indexes were being created.  We will
-	drop the partially created indexes before the rollback of
-	incomplete transactions is initiated.  Thus, this should not
-	interfere with the incomplete transactions. */
-	trx->isolation_level = TRX_ISO_READ_UNCOMMITTED;
-	err = que_eval_sql(NULL, drop_temp_indexes, FALSE, trx);
-	ut_a(err == DB_SUCCESS);
+	mtr_start(&mtr);
+
+	btr_pcur_open_at_index_side(
+		TRUE,
+		dict_table_get_first_index(dict_sys->sys_indexes),
+		BTR_SEARCH_LEAF, &pcur, TRUE, &mtr);
+
+	for (;;) {
+		const rec_t*	rec;
+		const byte*	field;
+		ulint		len;
+		dulint		table_id;
+		dict_table_t*	table;
 
+		btr_pcur_move_to_next_user_rec(&pcur, &mtr);
+
+		if (!btr_pcur_is_on_user_rec(&pcur)) {
+			break;
+		}
+
+		rec = btr_pcur_get_rec(&pcur);
+		field = rec_get_nth_field_old(rec, DICT_SYS_INDEXES_NAME_FIELD,
+					      &len);
+		if (len == UNIV_SQL_NULL || len == 0
+		    || mach_read_from_1(field) != (ulint) TEMP_INDEX_PREFIX) {
+			continue;
+		}
+
+		/* This is a temporary index. */
+
+		field = rec_get_nth_field_old(rec, 0/*TABLE_ID*/, &len);
+		if (len != 8) {
+			/* Corrupted TABLE_ID */
+			continue;
+		}
+
+		table_id = mach_read_from_8(field);
+
+		btr_pcur_store_position(&pcur, &mtr);
+		btr_pcur_commit_specify_mtr(&pcur, &mtr);
+
+		table = dict_load_table_on_id(table_id);
+
+		if (table) {
+			dict_index_t*	index;
+
+			for (index = dict_table_get_first_index(table);
+			     index; index = dict_table_get_next_index(index)) {
+
+				if (*index->name == TEMP_INDEX_PREFIX) {
+					row_merge_drop_index(index, table, trx);
+					trx_commit_for_mysql(trx);
+				}
+			}
+		}
+
+		mtr_start(&mtr);
+		btr_pcur_restore_position(BTR_SEARCH_LEAF,
+					  &pcur, &mtr);
+	}
+
+	btr_pcur_close(&pcur);
+	mtr_commit(&mtr);
 	row_mysql_unlock_data_dictionary(trx);
 	trx_free_for_background(trx);
 }

=== modified file 'storage/innodb_plugin/row/row0mysql.c'
--- a/storage/innodb_plugin/row/row0mysql.c	2009-11-30 13:13:34 +0000
+++ b/storage/innodb_plugin/row/row0mysql.c	2010-04-01 11:59:25 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2000, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2000, 2010, Innobase Oy. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -485,7 +485,7 @@ next_column:
 /****************************************************************//**
 Handles user errors and lock waits detected by the database engine.
 @return TRUE if it was a lock wait and we should continue running the
-query thread */
+query thread and in that case the thr is ALREADY in the running state. */
 UNIV_INTERN
 ibool
 row_mysql_handle_errors(
@@ -3255,19 +3255,13 @@ check_next_foreign:
 			   "END;\n"
 			   , FALSE, trx);
 
-	if (err != DB_SUCCESS) {
-		ut_a(err == DB_OUT_OF_FILE_SPACE);
-
-		err = DB_MUST_GET_MORE_FILE_SPACE;
-
-		row_mysql_handle_errors(&err, trx, NULL, NULL);
-
-		ut_error;
-	} else {
-		ibool		is_path;
+	switch (err) {
+		ibool		is_temp;
 		const char*	name_or_path;
 		mem_heap_t*	heap;
 
+	case DB_SUCCESS:
+
 		heap = mem_heap_create(200);
 
 		/* Clone the name, in case it has been allocated
@@ -3277,12 +3271,13 @@ check_next_foreign:
 		space_id = table->space;
 
 		if (table->dir_path_of_temp_table != NULL) {
-			is_path = TRUE;
 			name_or_path = mem_heap_strdup(
 				heap, table->dir_path_of_temp_table);
+			is_temp = TRUE;
 		} else {
-			is_path = FALSE;
 			name_or_path = name;
+			is_temp = (table->flags >> DICT_TF2_SHIFT)
+				& DICT_TF2_TEMPORARY;
 		}
 
 		dict_table_remove_from_cache(table);
@@ -3302,8 +3297,8 @@ check_next_foreign:
 		if (err == DB_SUCCESS && space_id > 0) {
 			if (!fil_space_for_table_exists_in_mem(space_id,
 							       name_or_path,
-							       is_path,
-							       FALSE, TRUE)) {
+							       is_temp, FALSE,
+							       !is_temp)) {
 				err = DB_SUCCESS;
 
 				fprintf(stderr,
@@ -3332,7 +3327,27 @@ check_next_foreign:
 		}
 
 		mem_heap_free(heap);
+		break;
+
+	case DB_TOO_MANY_CONCURRENT_TRXS:
+		/* Cannot even find a free slot for the
+		the undo log. We can directly exit here
+		and return the DB_TOO_MANY_CONCURRENT_TRXS
+		error. */
+		break;
+
+	case DB_OUT_OF_FILE_SPACE:
+		err = DB_MUST_GET_MORE_FILE_SPACE;
+
+		row_mysql_handle_errors(&err, trx, NULL, NULL);
+
+		/* Fall through to raise error */
+
+	default:
+		/* No other possible error returns */
+		ut_error;
 	}
+
 funct_exit:
 
 	if (locked_dictionary) {
@@ -3348,6 +3363,90 @@ funct_exit:
 	return((int) err);
 }
 
+/*********************************************************************//**
+Drop all temporary tables during crash recovery. */
+UNIV_INTERN
+void
+row_mysql_drop_temp_tables(void)
+/*============================*/
+{
+	trx_t*		trx;
+	btr_pcur_t	pcur;
+	mtr_t		mtr;
+	mem_heap_t*	heap;
+
+	trx = trx_allocate_for_background();
+	trx->op_info = "dropping temporary tables";
+	row_mysql_lock_data_dictionary(trx);
+
+	heap = mem_heap_create(200);
+
+	mtr_start(&mtr);
+
+	btr_pcur_open_at_index_side(
+		TRUE,
+		dict_table_get_first_index(dict_sys->sys_tables),
+		BTR_SEARCH_LEAF, &pcur, TRUE, &mtr);
+
+	for (;;) {
+		const rec_t*	rec;
+		const byte*	field;
+		ulint		len;
+		const char*	table_name;
+		dict_table_t*	table;
+
+		btr_pcur_move_to_next_user_rec(&pcur, &mtr);
+
+		if (!btr_pcur_is_on_user_rec(&pcur)) {
+			break;
+		}
+
+		rec = btr_pcur_get_rec(&pcur);
+		field = rec_get_nth_field_old(rec, 4/*N_COLS*/, &len);
+		if (len != 4 || !(mach_read_from_4(field) & 0x80000000UL)) {
+			continue;
+		}
+
+		/* Because this is not a ROW_FORMAT=REDUNDANT table,
+		the is_temp flag is valid.  Examine it. */
+
+		field = rec_get_nth_field_old(rec, 7/*MIX_LEN*/, &len);
+		if (len != 4
+		    || !(mach_read_from_4(field) & DICT_TF2_TEMPORARY)) {
+			continue;
+		}
+
+		/* This is a temporary table. */
+		field = rec_get_nth_field_old(rec, 0/*NAME*/, &len);
+		if (len == UNIV_SQL_NULL || len == 0) {
+			/* Corrupted SYS_TABLES.NAME */
+			continue;
+		}
+
+		table_name = mem_heap_strdupl(heap, (const char*) field, len);
+
+		btr_pcur_store_position(&pcur, &mtr);
+		btr_pcur_commit_specify_mtr(&pcur, &mtr);
+
+		table = dict_load_table(table_name);
+
+		if (table) {
+			row_drop_table_for_mysql(table_name, trx, FALSE);
+			trx_commit_for_mysql(trx);
+		}
+
+		mtr_start(&mtr);
+		btr_pcur_restore_position(BTR_SEARCH_LEAF,
+					  &pcur, &mtr);
+	}
+
+	btr_pcur_close(&pcur);
+	mtr_commit(&mtr);
+	mem_heap_free(heap);
+	row_mysql_unlock_data_dictionary(trx);
+	trx_free_for_background(trx);
+}
+
 /*******************************************************************//**
 Drop all foreign keys in a database, see Bug#18942.
 Called at the end of row_drop_database_for_mysql().
@@ -3899,14 +3998,15 @@ Checks that the index contains entries i
 constraint is not broken, and calculates the number of index entries
 in the read view of the current transaction.
 @return	TRUE if ok */
-static
+UNIV_INTERN
 ibool
-row_scan_and_check_index(
-/*=====================*/
-	row_prebuilt_t*	prebuilt,	/*!< in: prebuilt struct in MySQL */
-	dict_index_t*	index,		/*!< in: index */
-	ulint*		n_rows)		/*!< out: number of entries seen in the
-					current consistent read */
+row_check_index_for_mysql(
+/*======================*/
+	row_prebuilt_t*		prebuilt,	/*!< in: prebuilt struct
+						in MySQL handle */
+	const dict_index_t*	index,		/*!< in: index */
+	ulint*			n_rows)		/*!< out: number of entries
+						seen in the consistent read */
 {
 	dtuple_t*	prev_entry	= NULL;
 	ulint		matched_fields;
@@ -3927,31 +4027,9 @@ row_scan_and_check_index(
 
 	*n_rows = 0;
 
-	if (!row_merge_is_index_usable(prebuilt->trx, index)) {
-		/* A newly created index may lack some delete-marked
-		records that may exist in the read view of
-		prebuilt->trx.  Thus, such indexes must not be
-		accessed by consistent read. */
-		return(is_ok);
-	}
-
 	buf = mem_alloc(UNIV_PAGE_SIZE);
 	heap = mem_heap_create(100);
 
-	/* Make a dummy template in prebuilt, which we will use
-	in scanning the index entries */
-
-	prebuilt->index = index;
-	/* row_merge_is_index_usable() was already checked above. */
-	prebuilt->index_usable = TRUE;
-	prebuilt->sql_stat_start = TRUE;
-	prebuilt->template_type = ROW_MYSQL_DUMMY_TEMPLATE;
-	prebuilt->n_template = 0;
-	prebuilt->need_to_access_clustered = FALSE;
-
-	dtuple_set_n_fields(prebuilt->search_tuple, 0);
-
-	prebuilt->select_lock_type = LOCK_NONE;
 	cnt = 1000;
 
 	ret = row_search_for_mysql(buf, PAGE_CUR_G, prebuilt, 0, 0);
@@ -4070,119 +4148,6 @@ not_ok:
 }
 
 /*********************************************************************//**
-Checks a table for corruption.
-@return	DB_ERROR or DB_SUCCESS */
-UNIV_INTERN
-ulint
-row_check_table_for_mysql(
-/*======================*/
-	row_prebuilt_t*	prebuilt)	/*!< in: prebuilt struct in MySQL
-					handle */
-{
-	dict_table_t*	table		= prebuilt->table;
-	dict_index_t*	index;
-	ulint		n_rows;
-	ulint		n_rows_in_table	= ULINT_UNDEFINED;
-	ulint		ret		= DB_SUCCESS;
-	ulint		old_isolation_level;
-
-	if (table->ibd_file_missing) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr, "  InnoDB: Error:\n"
-			"InnoDB: MySQL is trying to use a table handle"
-			" but the .ibd file for\n"
-			"InnoDB: table %s does not exist.\n"
-			"InnoDB: Have you deleted the .ibd file"
-			" from the database directory under\n"
-			"InnoDB: the MySQL datadir, or have you"
-			" used DISCARD TABLESPACE?\n"
-			"InnoDB: Look from\n"
-			"InnoDB: " REFMAN "innodb-troubleshooting.html\n"
-			"InnoDB: how you can resolve the problem.\n",
-			table->name);
-		return(DB_ERROR);
-	}
-
-	prebuilt->trx->op_info = "checking table";
-
-	old_isolation_level = prebuilt->trx->isolation_level;
-
-	/* We must run the index record counts at an isolation level
-	>= READ COMMITTED, because a dirty read can see a wrong number
-	of records in some index; to play safe, we use always
-	REPEATABLE READ here */
-
-	prebuilt->trx->isolation_level = TRX_ISO_REPEATABLE_READ;
-
-	/* Enlarge the fatal lock wait timeout during CHECK TABLE. */
-	mutex_enter(&kernel_mutex);
-	srv_fatal_semaphore_wait_threshold += 7200; /* 2 hours */
-	mutex_exit(&kernel_mutex);
-
-	index = dict_table_get_first_index(table);
-
-	while (index != NULL) {
-		/* fputs("Validating index ", stderr);
-		ut_print_name(stderr, trx, FALSE, index->name);
-		putc('\n', stderr); */
-
-		if (!btr_validate_index(index, prebuilt->trx)) {
-			ret = DB_ERROR;
-		} else {
-			if (!row_scan_and_check_index(prebuilt,index, &n_rows)){
-				ret = DB_ERROR;
-			}
-
-			if (trx_is_interrupted(prebuilt->trx)) {
-				ret = DB_INTERRUPTED;
-				break;
-			}
-
-			/* fprintf(stderr, "%lu entries in index %s\n", n_rows,
-			index->name); */
-
-			if (index == dict_table_get_first_index(table)) {
-				n_rows_in_table = n_rows;
-			} else if (n_rows != n_rows_in_table) {
-
-				ret = DB_ERROR;
-
-				fputs("Error: ", stderr);
-				dict_index_name_print(stderr,
-						      prebuilt->trx, index);
-				fprintf(stderr,
-					" contains %lu entries,"
-					" should be %lu\n",
-					(ulong) n_rows,
-					(ulong) n_rows_in_table);
-			}
-		}
-
-		index = dict_table_get_next_index(index);
-	}
-
-	/* Restore the original isolation level */
-	prebuilt->trx->isolation_level = old_isolation_level;
-
-	/* We validate also the whole adaptive hash index for all tables
-	at every CHECK TABLE */
-
-	if (!btr_search_validate()) {
-
-		ret = DB_ERROR;
-	}
-
-	/* Restore the fatal lock wait timeout after CHECK TABLE. */
-	mutex_enter(&kernel_mutex);
-	srv_fatal_semaphore_wait_threshold -= 7200; /* 2 hours */
-	mutex_exit(&kernel_mutex);
-
-	prebuilt->trx->op_info = "";
-
-	return(ret);
-}
-
-/*********************************************************************//**
 Determines if a table is a magic monitor table.
 @return	TRUE if monitor table */
 UNIV_INTERN

=== modified file 'storage/innodb_plugin/row/row0row.c'
--- a/storage/innodb_plugin/row/row0row.c	2009-07-30 12:42:56 +0000
+++ b/storage/innodb_plugin/row/row0row.c	2010-04-01 13:01:22 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -915,6 +915,10 @@ row_raw_format(
 
 		ret = row_raw_format_int(data, data_len, prtype,
 					 buf, buf_size, &format_in_hex);
+		if (format_in_hex) {
+
+			goto format_in_hex;
+		}
 		break;
 	case DATA_CHAR:
 	case DATA_VARCHAR:
@@ -923,14 +927,15 @@ row_raw_format(
 
 		ret = row_raw_format_str(data, data_len, prtype,
 					 buf, buf_size, &format_in_hex);
+		if (format_in_hex) {
+
+			goto format_in_hex;
+		}
+
 		break;
 	/* XXX support more data types */
 	default:
-
-		format_in_hex = TRUE;
-	}
-
-	if (format_in_hex) {
+	format_in_hex:
 
 		if (UNIV_LIKELY(buf_size > 2)) {
 

=== modified file 'storage/innodb_plugin/row/row0sel.c'
--- a/storage/innodb_plugin/row/row0sel.c	2009-07-30 12:42:56 +0000
+++ b/storage/innodb_plugin/row/row0sel.c	2010-04-01 12:00:18 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1997, 2010, Innobase Oy. All Rights Reserved.
 Copyright (c) 2008, Google Inc.
 
 Portions of this file contain modifications contributed and copyrighted by
@@ -132,7 +132,8 @@ index record.
 NOTE: the comparison is NOT done as a binary comparison, but character
 fields are compared with collation!
 @return TRUE if the secondary record is equal to the corresponding
-fields in the clustered record, when compared with collation */
+fields in the clustered record, when compared with collation;
+FALSE if not equal or if the clustered record has been marked for deletion */
 static
 ibool
 row_sel_sec_rec_is_for_clust_rec(
@@ -431,10 +432,6 @@ row_sel_fetch_columns(
 				data = rec_get_nth_field(rec, offsets,
 							 field_no, &len);
 
-				if (len == UNIV_SQL_NULL) {
-					len = UNIV_SQL_NULL;
-				}
-
 				needs_copy = column->copy_val;
 			}
 
@@ -2170,36 +2167,6 @@ row_fetch_print(
 	return((void*)42);
 }
 
-/****************************************************************//**
-Callback function for fetch that stores an unsigned 4 byte integer to the
-location pointed. The column's type must be DATA_INT, DATA_UNSIGNED, length
-= 4.
-@return	always returns NULL */
-UNIV_INTERN
-void*
-row_fetch_store_uint4(
-/*==================*/
-	void*	row,		/*!< in:  sel_node_t* */
-	void*	user_arg)	/*!< in:  data pointer */
-{
-	sel_node_t*	node = row;
-	ib_uint32_t*	val = user_arg;
-	ulint		tmp;
-
-	dfield_t*	dfield = que_node_get_val(node->select_list);
-	const dtype_t*	type = dfield_get_type(dfield);
-	ulint		len = dfield_get_len(dfield);
-
-	ut_a(dtype_get_mtype(type) == DATA_INT);
-	ut_a(dtype_get_prtype(type) & DATA_UNSIGNED);
-	ut_a(len == 4);
-
-	tmp = mach_read_from_4(dfield_get_data(dfield));
-	*val = (ib_uint32_t) tmp;
-
-	return(NULL);
-}
-
 /***********************************************************//**
 Prints a row in a select result.
 @return	query thread to run next or NULL */
@@ -2981,6 +2948,7 @@ row_sel_get_clust_rec_for_mysql(
 
 		if (clust_rec
 		    && (old_vers
+			|| trx->isolation_level <= TRX_ISO_READ_UNCOMMITTED
 			|| rec_get_deleted_flag(rec, dict_table_is_comp(
 							sec_index->table)))
 		    && !row_sel_sec_rec_is_for_clust_rec(
@@ -3202,14 +3170,17 @@ row_sel_try_search_shortcut_for_mysql(
 	ut_ad(dict_index_is_clust(index));
 	ut_ad(!prebuilt->templ_contains_blob);
 
+#ifndef UNIV_SEARCH_DEBUG
 	btr_pcur_open_with_no_init(index, search_tuple, PAGE_CUR_GE,
 				   BTR_SEARCH_LEAF, pcur,
-#ifndef UNIV_SEARCH_DEBUG
 				   RW_S_LATCH,
-#else
+				   mtr);
+#else /* UNIV_SEARCH_DEBUG */
+	btr_pcur_open_with_no_init(index, search_tuple, PAGE_CUR_GE,
+				   BTR_SEARCH_LEAF, pcur,
 				   0,
-#endif
 				   mtr);
+#endif /* UNIV_SEARCH_DEBUG */
 	rec = btr_pcur_get_rec(pcur);
 
 	if (!page_rec_is_user_rec(rec)) {
@@ -4616,6 +4587,7 @@ row_search_autoinc_read_column(
 	dict_index_t*	index,		/*!< in: index to read from */
 	const rec_t*	rec,		/*!< in: current rec */
 	ulint		col_no,		/*!< in: column number */
+	ulint		mtype,		/*!< in: column main type */
 	ibool		unsigned_type)	/*!< in: signed or unsigned flag */
 {
 	ulint		len;
@@ -4632,10 +4604,26 @@ row_search_autoinc_read_column(
 	data = rec_get_nth_field(rec, offsets, col_no, &len);
 
 	ut_a(len != UNIV_SQL_NULL);
-	ut_a(len <= sizeof value);
 
-	/* we assume AUTOINC value cannot be negative */
-	value = mach_read_int_type(data, len, unsigned_type);
+	switch (mtype) {
+	case DATA_INT:
+		ut_a(len <= sizeof value);
+		value = mach_read_int_type(data, len, unsigned_type);
+		break;
+
+	case DATA_FLOAT:
+		ut_a(len == sizeof(float));
+		value = mach_float_read(data);
+		break;
+
+	case DATA_DOUBLE:
+		ut_a(len == sizeof(double));
+		value = mach_double_read(data);
+		break;
+
+	default:
+		ut_error;
+	}
 
 	if (UNIV_LIKELY_NULL(heap)) {
 		mem_heap_free(heap);
@@ -4721,7 +4709,8 @@ row_search_max_autoinc(
 					dfield->col->prtype & DATA_UNSIGNED);
 
 				*value = row_search_autoinc_read_column(
-					index, rec, i, unsigned_type);
+					index, rec, i,
+					dfield->col->mtype, unsigned_type);
 			}
 		}
 

=== modified file 'storage/innodb_plugin/row/row0umod.c'
--- a/storage/innodb_plugin/row/row0umod.c	2009-05-27 09:45:59 +0000
+++ b/storage/innodb_plugin/row/row0umod.c	2010-04-01 11:59:25 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1997, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1997, 2010, Innobase Oy. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -144,13 +144,17 @@ row_undo_mod_clust_low(
 
 /***********************************************************//**
 Removes a clustered index record after undo if possible.
+This is attempted when the record was inserted by updating a
+delete-marked record and there no longer exist transactions
+that would see the delete-marked record.  In other words, we
+roll back the insert by purging the record.
 @return	DB_SUCCESS, DB_FAIL, or error code: we may run out of file space */
 static
 ulint
 row_undo_mod_remove_clust_low(
 /*==========================*/
 	undo_node_t*	node,	/*!< in: row undo node */
-	que_thr_t*	thr __attribute__((unused)), /*!< in: query thread */
+	que_thr_t*	thr,	/*!< in: query thread */
 	mtr_t*		mtr,	/*!< in: mtr */
 	ulint		mode)	/*!< in: BTR_MODIFY_LEAF or BTR_MODIFY_TREE */
 {
@@ -159,6 +163,7 @@ row_undo_mod_remove_clust_low(
 	ulint		err;
 	ibool		success;
 
+	ut_ad(node->rec_type == TRX_UNDO_UPD_DEL_REC);
 	pcur = &(node->pcur);
 	btr_cur = btr_pcur_get_btr_cur(pcur);
 
@@ -190,11 +195,13 @@ row_undo_mod_remove_clust_low(
 	} else {
 		ut_ad(mode == BTR_MODIFY_TREE);
 
-		/* Note that since this operation is analogous to purge,
-		we can free also inherited externally stored fields:
-		hence the RB_NONE in the call below */
+		/* This operation is analogous to purge, we can free also
+		inherited externally stored fields */
 
-		btr_cur_pessimistic_delete(&err, FALSE, btr_cur, RB_NONE, mtr);
+		btr_cur_pessimistic_delete(&err, FALSE, btr_cur,
+					   thr_is_recv(thr)
+					   ? RB_RECOVERY_PURGE_REC
+					   : RB_NONE, mtr);
 
 		/* The delete operation may fail if we have little
 		file space left: TODO: easiest to crash the database
@@ -370,10 +377,11 @@ row_undo_mod_del_mark_or_remove_sec_low(
 		} else {
 			ut_ad(mode == BTR_MODIFY_TREE);
 
-			/* No need to distinguish RB_RECOVERY here, because we
-			are deleting a secondary index record: the distinction
-			between RB_NORMAL and RB_RECOVERY only matters when
-			deleting a record that contains externally stored
+			/* No need to distinguish RB_RECOVERY_PURGE here,
+			because we are deleting a secondary index record:
+			the distinction between RB_NORMAL and
+			RB_RECOVERY_PURGE only matters when deleting a
+			record that contains externally stored
 			columns. */
 			ut_ad(!dict_index_is_clust(index));
 			btr_cur_pessimistic_delete(&err, FALSE, btr_cur,
@@ -438,7 +446,7 @@ row_undo_mod_del_unmark_sec_and_undo_upd
 				BTR_MODIFY_TREE */
 	que_thr_t*	thr,	/*!< in: query thread */
 	dict_index_t*	index,	/*!< in: index */
-	dtuple_t*	entry)	/*!< in: index entry */
+	const dtuple_t*	entry)	/*!< in: index entry */
 {
 	mem_heap_t*	heap;
 	btr_pcur_t	pcur;
@@ -533,6 +541,7 @@ row_undo_mod_upd_del_sec(
 	dict_index_t*	index;
 	ulint		err	= DB_SUCCESS;
 
+	ut_ad(node->rec_type == TRX_UNDO_UPD_DEL_REC);
 	heap = mem_heap_create(1024);
 
 	while (node->index != NULL) {
@@ -550,7 +559,7 @@ row_undo_mod_upd_del_sec(
 			does not exist.  However, this situation may
 			only occur during the rollback of incomplete
 			transactions. */
-			ut_a(trx_is_recv(thr_get_trx(thr)));
+			ut_a(thr_is_recv(thr));
 		} else {
 			err = row_undo_mod_del_mark_or_remove_sec(
 				node, thr, index, entry);

=== modified file 'storage/innodb_plugin/row/row0upd.c'
--- a/storage/innodb_plugin/row/row0upd.c	2009-05-27 09:45:59 +0000
+++ b/storage/innodb_plugin/row/row0upd.c	2010-04-01 11:03:27 +0000
@@ -1344,9 +1344,6 @@ row_upd_copy_columns(
 		data = rec_get_nth_field(rec, offsets,
 					 column->field_nos[SYM_CLUST_FIELD_NO],
 					 &len);
-		if (len == UNIV_SQL_NULL) {
-			len = UNIV_SQL_NULL;
-		}
 		eval_node_copy_and_alloc_val(column, data, len);
 
 		column = UT_LIST_GET_NEXT(col_var_list, column);

=== modified file 'storage/innodb_plugin/srv/srv0srv.c'
--- a/storage/innodb_plugin/srv/srv0srv.c	2009-11-30 11:32:05 +0000
+++ b/storage/innodb_plugin/srv/srv0srv.c	2010-04-01 12:01:33 +0000
@@ -1,7 +1,8 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
 Copyright (c) 2008, 2009 Google Inc.
+Copyright (c) 2009, Percona Inc.
 
 Portions of this file contain modifications contributed and copyrighted by
 Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -9,6 +10,13 @@ briefly in the InnoDB documentation. The
 incorporated with their permission, and subject to the conditions contained in
 the file COPYING.Google.
 
+Portions of this file contain modifications contributed and copyrighted
+by Percona Inc.. Those modifications are
+gratefully acknowledged and are described briefly in the InnoDB
+documentation. The contributions by Percona Inc. are incorporated with
+their permission, and subject to the conditions contained in the file
+COPYING.Percona.
+
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
 Foundation; version 2 of the License.
@@ -22,32 +30,6 @@ this program; if not, write to the Free 
 Place, Suite 330, Boston, MA 02111-1307 USA
 
 *****************************************************************************/
-/***********************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-Copyright (c) 2009, Percona Inc.
-
-Portions of this file contain modifications contributed and copyrighted
-by Percona Inc.. Those modifications are
-gratefully acknowledged and are described briefly in the InnoDB
-documentation. The contributions by Percona Inc. are incorporated with
-their permission, and subject to the conditions contained in the file
-COPYING.Percona.
-
-This program is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
-Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-***********************************************************************/
 
 /**************************************************//**
 @file srv/srv0srv.c
@@ -119,7 +101,8 @@ UNIV_INTERN ulint	srv_fatal_semaphore_wa
 in microseconds, in order to reduce the lagging of the purge thread. */
 UNIV_INTERN ulint	srv_dml_needed_delay = 0;
 
-UNIV_INTERN ibool	srv_lock_timeout_and_monitor_active = FALSE;
+UNIV_INTERN ibool	srv_lock_timeout_active = FALSE;
+UNIV_INTERN ibool	srv_monitor_active = FALSE;
 UNIV_INTERN ibool	srv_error_monitor_active = FALSE;
 
 UNIV_INTERN const char*	srv_main_thread_op_info = "";
@@ -188,7 +171,17 @@ UNIV_INTERN ulong	srv_flush_log_at_trx_c
 the checkpoints. */
 UNIV_INTERN char	srv_adaptive_flushing	= TRUE;
 
-/* The sort order table of the MySQL latin1_swedish_ci character set
+/** Maximum number of times allowed to conditionally acquire
+mutex before switching to blocking wait on the mutex */
+#define MAX_MUTEX_NOWAIT	20
+
+/** Check whether the number of failed nonblocking mutex
+acquisition attempts exceeds maximum allowed value. If so,
+srv_printf_innodb_monitor() will request mutex acquisition
+with mutex_enter(), which will wait until it gets the mutex. */
+#define MUTEX_NOWAIT(mutex_skipped)	((mutex_skipped) < MAX_MUTEX_NOWAIT)
+
+/** The sort order table of the MySQL latin1_swedish_ci character set
 collation */
 UNIV_INTERN const byte*	srv_latin1_ordering;
 
@@ -1683,12 +1676,15 @@ srv_refresh_innodb_monitor_stats(void)
 }
 
 /******************************************************************//**
-Outputs to a file the output of the InnoDB Monitor. */
+Outputs to a file the output of the InnoDB Monitor.
+@return FALSE if not all information printed
+due to failure to obtain necessary mutex */
 UNIV_INTERN
-void
+ibool
 srv_printf_innodb_monitor(
 /*======================*/
 	FILE*	file,		/*!< in: output stream */
+	ibool	nowait,		/*!< in: whether to wait for kernel mutex */
 	ulint*	trx_start,	/*!< out: file position of the start of
 				the list of active transactions */
 	ulint*	trx_end)	/*!< out: file position of the end of
@@ -1697,6 +1693,7 @@ srv_printf_innodb_monitor(
 	double	time_elapsed;
 	time_t	current_time;
 	ulint	n_reserved;
+	ibool	ret;
 
 	mutex_enter(&srv_innodb_monitor_mutex);
 
@@ -1720,9 +1717,9 @@ srv_printf_innodb_monitor(
 		"Per second averages calculated from the last %lu seconds\n",
 		(ulong)time_elapsed);
 
-	fputs("----------\n"
-		"BACKGROUND THREAD\n"
-		"----------\n", file);
+	fputs("-----------------\n"
+	      "BACKGROUND THREAD\n"
+	      "-----------------\n", file);
 	srv_print_master_thread_info(file);
 
 	fputs("----------\n"
@@ -1746,24 +1743,31 @@ srv_printf_innodb_monitor(
 
 	mutex_exit(&dict_foreign_err_mutex);
 
-	lock_print_info_summary(file);
-	if (trx_start) {
-		long	t = ftell(file);
-		if (t < 0) {
-			*trx_start = ULINT_UNDEFINED;
-		} else {
-			*trx_start = (ulint) t;
+	/* Only if lock_print_info_summary proceeds correctly,
+	before we call the lock_print_info_all_transactions
+	to print all the lock information. */
+	ret = lock_print_info_summary(file, nowait);
+
+	if (ret) {
+		if (trx_start) {
+			long	t = ftell(file);
+			if (t < 0) {
+				*trx_start = ULINT_UNDEFINED;
+			} else {
+				*trx_start = (ulint) t;
+			}
 		}
-	}
-	lock_print_info_all_transactions(file);
-	if (trx_end) {
-		long	t = ftell(file);
-		if (t < 0) {
-			*trx_end = ULINT_UNDEFINED;
-		} else {
-			*trx_end = (ulint) t;
+		lock_print_info_all_transactions(file);
+		if (trx_end) {
+			long	t = ftell(file);
+			if (t < 0) {
+				*trx_end = ULINT_UNDEFINED;
+			} else {
+				*trx_end = (ulint) t;
+			}
 		}
 	}
+
 	fputs("--------\n"
 	      "FILE I/O\n"
 	      "--------\n", file);
@@ -1861,6 +1865,8 @@ srv_printf_innodb_monitor(
 	      "============================\n", file);
 	mutex_exit(&srv_innodb_monitor_mutex);
 	fflush(file);
+
+	return(ret);
 }
 
 /******************************************************************//**
@@ -1948,26 +1954,23 @@ srv_export_innodb_status(void)
 }
 
 /*********************************************************************//**
-A thread which wakes up threads whose lock wait may have lasted too long.
-This also prints the info output by various InnoDB monitors.
+A thread which prints the info output by various InnoDB monitors.
 @return	a dummy parameter */
 UNIV_INTERN
 os_thread_ret_t
-srv_lock_timeout_and_monitor_thread(
-/*================================*/
+srv_monitor_thread(
+/*===============*/
 	void*	arg __attribute__((unused)))
 			/*!< in: a dummy parameter required by
 			os_thread_create */
 {
-	srv_slot_t*	slot;
 	double		time_elapsed;
 	time_t		current_time;
 	time_t		last_table_monitor_time;
 	time_t		last_tablespace_monitor_time;
 	time_t		last_monitor_time;
-	ibool		some_waits;
-	double		wait_time;
-	ulint		i;
+	ulint		mutex_skipped;
+	ibool		last_srv_print_monitor;
 
 #ifdef UNIV_DEBUG_THREAD_CREATION
 	fprintf(stderr, "Lock timeout thread starts, id %lu\n",
@@ -1978,13 +1981,15 @@ srv_lock_timeout_and_monitor_thread(
 	last_table_monitor_time = time(NULL);
 	last_tablespace_monitor_time = time(NULL);
 	last_monitor_time = time(NULL);
+	mutex_skipped = 0;
+	last_srv_print_monitor = srv_print_innodb_monitor;
 loop:
-	srv_lock_timeout_and_monitor_active = TRUE;
+	srv_monitor_active = TRUE;
 
-	/* When someone is waiting for a lock, we wake up every second
-	and check if a timeout has passed for a lock wait */
+	/* Wake up every 5 seconds to see if we need to print
+	monitor information. */
 
-	os_thread_sleep(1000000);
+	os_thread_sleep(5000000);
 
 	current_time = time(NULL);
 
@@ -1994,14 +1999,40 @@ loop:
 		last_monitor_time = time(NULL);
 
 		if (srv_print_innodb_monitor) {
-			srv_printf_innodb_monitor(stderr, NULL, NULL);
+			/* Reset mutex_skipped counter everytime
+			srv_print_innodb_monitor changes. This is to
+			ensure we will not be blocked by kernel_mutex
+			for short duration information printing,
+			such as requested by sync_array_print_long_waits() */
+			if (!last_srv_print_monitor) {
+				mutex_skipped = 0;
+				last_srv_print_monitor = TRUE;
+			}
+
+			if (!srv_printf_innodb_monitor(stderr,
+						MUTEX_NOWAIT(mutex_skipped),
+						NULL, NULL)) {
+				mutex_skipped++;
+			} else {
+				/* Reset the counter */
+				mutex_skipped = 0;
+			}
+		} else {
+			last_srv_print_monitor = FALSE;
 		}
 
+
 		if (srv_innodb_status) {
 			mutex_enter(&srv_monitor_file_mutex);
 			rewind(srv_monitor_file);
-			srv_printf_innodb_monitor(srv_monitor_file, NULL,
-						  NULL);
+			if (!srv_printf_innodb_monitor(srv_monitor_file,
+						MUTEX_NOWAIT(mutex_skipped),
+						NULL, NULL)) {
+				mutex_skipped++;
+			} else {
+				mutex_skipped = 0;
+			}
+
 			os_file_set_eof(srv_monitor_file);
 			mutex_exit(&srv_monitor_file_mutex);
 		}
@@ -2054,6 +2085,56 @@ loop:
 		}
 	}
 
+	if (srv_shutdown_state >= SRV_SHUTDOWN_CLEANUP) {
+		goto exit_func;
+	}
+
+	if (srv_print_innodb_monitor
+	    || srv_print_innodb_lock_monitor
+	    || srv_print_innodb_tablespace_monitor
+	    || srv_print_innodb_table_monitor) {
+		goto loop;
+	}
+
+	srv_monitor_active = FALSE;
+
+	goto loop;
+
+exit_func:
+	srv_monitor_active = FALSE;
+
+	/* We count the number of threads in os_thread_exit(). A created
+	thread should always use that to exit and not use return() to exit. */
+
+	os_thread_exit(NULL);
+
+	OS_THREAD_DUMMY_RETURN;
+}
+
+/*********************************************************************//**
+A thread which wakes up threads whose lock wait may have lasted too long.
+@return	a dummy parameter */
+UNIV_INTERN
+os_thread_ret_t
+srv_lock_timeout_thread(
+/*====================*/
+	void*	arg __attribute__((unused)))
+			/* in: a dummy parameter required by
+			os_thread_create */
+{
+	srv_slot_t*	slot;
+	ibool		some_waits;
+	double		wait_time;
+	ulint		i;
+
+loop:
+	/* When someone is waiting for a lock, we wake up every second
+	and check if a timeout has passed for a lock wait */
+
+	os_thread_sleep(1000000);
+
+	srv_lock_timeout_active = TRUE;
+
 	mutex_enter(&kernel_mutex);
 
 	some_waits = FALSE;
@@ -2104,17 +2185,11 @@ loop:
 		goto exit_func;
 	}
 
-	if (some_waits || srv_print_innodb_monitor
-	    || srv_print_innodb_lock_monitor
-	    || srv_print_innodb_tablespace_monitor
-	    || srv_print_innodb_table_monitor) {
+	if (some_waits) {
 		goto loop;
 	}
 
-	/* No one was waiting for a lock and no monitor was active:
-	suspend this thread */
-
-	srv_lock_timeout_and_monitor_active = FALSE;
+	srv_lock_timeout_active = FALSE;
 
 #if 0
 	/* The following synchronisation is disabled, since
@@ -2124,7 +2199,7 @@ loop:
 	goto loop;
 
 exit_func:
-	srv_lock_timeout_and_monitor_active = FALSE;
+	srv_lock_timeout_active = FALSE;
 
 	/* We count the number of threads in os_thread_exit(). A created
 	thread should always use that to exit and not use return() to exit. */
@@ -2449,7 +2524,10 @@ loop:
 						BUF_FLUSH_LIST,
 						n_flush,
 						IB_ULONGLONG_MAX);
-				skip_sleep = TRUE;
+
+				if (n_flush == PCT_IO(100)) {
+					skip_sleep = TRUE;
+				}
 			}
 		}
 

=== modified file 'storage/innodb_plugin/srv/srv0start.c'
--- a/storage/innodb_plugin/srv/srv0start.c	2009-11-30 11:32:05 +0000
+++ b/storage/innodb_plugin/srv/srv0start.c	2010-04-01 11:59:25 +0000
@@ -1,7 +1,8 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
 Copyright (c) 2008, Google Inc.
+Copyright (c) 2009, Percona Inc.
 
 Portions of this file contain modifications contributed and copyrighted by
 Google, Inc. Those modifications are gratefully acknowledged and are described
@@ -9,6 +10,13 @@ briefly in the InnoDB documentation. The
 incorporated with their permission, and subject to the conditions contained in
 the file COPYING.Google.
 
+Portions of this file contain modifications contributed and copyrighted
+by Percona Inc.. Those modifications are
+gratefully acknowledged and are described briefly in the InnoDB
+documentation. The contributions by Percona Inc. are incorporated with
+their permission, and subject to the conditions contained in the file
+COPYING.Percona.
+
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
 Foundation; version 2 of the License.
@@ -22,32 +30,6 @@ this program; if not, write to the Free 
 Place, Suite 330, Boston, MA 02111-1307 USA
 
 *****************************************************************************/
-/***********************************************************************
-
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
-Copyright (c) 2009, Percona Inc.
-
-Portions of this file contain modifications contributed and copyrighted
-by Percona Inc.. Those modifications are
-gratefully acknowledged and are described briefly in the InnoDB
-documentation. The contributions by Percona Inc. are incorporated with
-their permission, and subject to the conditions contained in the file
-COPYING.Percona.
-
-This program is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; version 2 of the License.
-
-This program is distributed in the hope that it will be useful, but
-WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
-Public License for more details.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
-
-***********************************************************************/
 
 /********************************************************************//**
 @file srv/srv0start.c
@@ -105,6 +87,7 @@ Created 2/16/1996 Heikki Tuuri
 # include "btr0pcur.h"
 # include "thr0loc.h"
 # include "os0sync.h" /* for INNODB_RW_LOCKS_USE_ATOMICS */
+# include "zlib.h" /* for ZLIB_VERSION */
 
 /** Log sequence number immediately after startup */
 UNIV_INTERN ib_uint64_t	srv_start_lsn;
@@ -143,9 +126,9 @@ static mutex_t		ios_mutex;
 static ulint		ios;
 
 /** io_handler_thread parameters for thread identification */
-static ulint		n[SRV_MAX_N_IO_THREADS + 5];
+static ulint		n[SRV_MAX_N_IO_THREADS + 6];
 /** io_handler_thread identifiers */
-static os_thread_id_t	thread_ids[SRV_MAX_N_IO_THREADS + 5];
+static os_thread_id_t	thread_ids[SRV_MAX_N_IO_THREADS + 6];
 
 /** We use this mutex to test the return value of pthread_mutex_trylock
    on successful locking. HP-UX does NOT return 0, though Linux et al do. */
@@ -1074,7 +1057,11 @@ innobase_start_or_create_for_mysql(void)
 #ifdef UNIV_IBUF_DEBUG
 	fprintf(stderr,
 		"InnoDB: !!!!!!!! UNIV_IBUF_DEBUG switched on !!!!!!!!!\n"
-		"InnoDB: Crash recovery will fail with UNIV_IBUF_DEBUG\n");
+# ifdef UNIV_IBUF_COUNT_DEBUG
+		"InnoDB: !!!!!!!! UNIV_IBUF_COUNT_DEBUG switched on !!!!!!!!!\n"
+		"InnoDB: Crash recovery will fail with UNIV_IBUF_COUNT_DEBUG\n"
+# endif
+		);
 #endif
 
 #ifdef UNIV_SYNC_DEBUG
@@ -1101,7 +1088,15 @@ innobase_start_or_create_for_mysql(void)
 			"InnoDB: The InnoDB memory heap is disabled\n");
 	}
 
-	fprintf(stderr, "InnoDB: %s\n", IB_ATOMICS_STARTUP_MSG);
+	fputs("InnoDB: " IB_ATOMICS_STARTUP_MSG
+	      "\nInnoDB: Compressed tables use zlib " ZLIB_VERSION
+#ifdef UNIV_ZIP_DEBUG
+	      " with validation"
+#endif /* UNIV_ZIP_DEBUG */
+#ifdef UNIV_ZIP_COPY
+	      " and extra copying"
+#endif /* UNIV_ZIP_COPY */
+	      "\n" , stderr);
 
 	/* Since InnoDB does not currently clean up all its internal data
 	structures in MySQL Embedded Server Library server_end(), we
@@ -1575,6 +1570,14 @@ innobase_start_or_create_for_mysql(void)
 		dict_boot();
 		trx_sys_init_at_db_start();
 
+		/* Initialize the fsp free limit global variable in the log
+		system */
+		fsp_header_get_free_limit();
+
+		/* recv_recovery_from_checkpoint_finish needs trx lists which
+		are initialized in trx_sys_init_at_db_start(). */
+
+		recv_recovery_from_checkpoint_finish();
 		if (srv_force_recovery < SRV_FORCE_NO_IBUF_MERGE) {
 			/* The following call is necessary for the insert
 			buffer to work with multiple tablespaces. We must
@@ -1590,26 +1593,14 @@ innobase_start_or_create_for_mysql(void)
 			every table in the InnoDB data dictionary that has
 			an .ibd file.
 
-			We also determine the maximum tablespace id used.
-
-			TODO: We may have incomplete transactions in the
-			data dictionary tables. Does that harm the scanning of
-			the data dictionary below? */
+			We also determine the maximum tablespace id used. */
 
 			dict_check_tablespaces_and_store_max_id(
 				recv_needed_recovery);
 		}
 
 		srv_startup_is_before_trx_rollback_phase = FALSE;
-
-		/* Initialize the fsp free limit global variable in the log
-		system */
-		fsp_header_get_free_limit();
-
-		/* recv_recovery_from_checkpoint_finish needs trx lists which
-		are initialized in trx_sys_init_at_db_start(). */
-
-		recv_recovery_from_checkpoint_finish();
+		recv_recovery_rollback_active();
 
 		/* It is possible that file_format tag has never
 		been set. In this case we initialize it to minimum
@@ -1658,15 +1649,18 @@ innobase_start_or_create_for_mysql(void)
 	/* fprintf(stderr, "Max allowed record size %lu\n",
 	page_get_free_space_of_empty() / 2); */
 
-	/* Create the thread which watches the timeouts for lock waits
-	and prints InnoDB monitor info */
-
-	os_thread_create(&srv_lock_timeout_and_monitor_thread, NULL,
+	/* Create the thread which watches the timeouts for lock waits */
+	os_thread_create(&srv_lock_timeout_thread, NULL,
 			 thread_ids + 2 + SRV_MAX_N_IO_THREADS);
 
 	/* Create the thread which warns of long semaphore waits */
 	os_thread_create(&srv_error_monitor_thread, NULL,
 			 thread_ids + 3 + SRV_MAX_N_IO_THREADS);
+
+	/* Create the thread which prints InnoDB monitor info */
+	os_thread_create(&srv_monitor_thread, NULL,
+			 thread_ids + 4 + SRV_MAX_N_IO_THREADS);
+
 	srv_is_being_started = FALSE;
 
 	if (trx_doublewrite == NULL) {

=== modified file 'storage/innodb_plugin/sync/sync0sync.c'
--- a/storage/innodb_plugin/sync/sync0sync.c	2009-11-30 11:32:05 +0000
+++ b/storage/innodb_plugin/sync/sync0sync.c	2010-04-01 11:59:25 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1995, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1995, 2010, Innobase Oy. All Rights Reserved.
 Copyright (c) 2008, Google Inc.
 
 Portions of this file contain modifications contributed and copyrighted by
@@ -315,6 +315,15 @@ mutex_free(
 	ut_a(mutex_get_lock_word(mutex) == 0);
 	ut_a(mutex_get_waiters(mutex) == 0);
 
+#ifdef UNIV_MEM_DEBUG
+	if (mutex == &mem_hash_mutex) {
+		ut_ad(UT_LIST_GET_LEN(mutex_list) == 1);
+		ut_ad(UT_LIST_GET_FIRST(mutex_list) == &mem_hash_mutex);
+		UT_LIST_REMOVE(list, mutex_list, mutex);
+		goto func_exit;
+	}
+#endif /* UNIV_MEM_DEBUG */
+
 	if (mutex != &mutex_list_mutex
 #ifdef UNIV_SYNC_DEBUG
 	    && mutex != &sync_thread_mutex
@@ -336,7 +345,9 @@ mutex_free(
 	}
 
 	os_event_free(mutex->event);
-
+#ifdef UNIV_MEM_DEBUG
+func_exit:
+#endif /* UNIV_MEM_DEBUG */
 #if !defined(HAVE_ATOMIC_BUILTINS)
 	os_fast_mutex_free(&(mutex->os_fast_mutex));
 #endif
@@ -947,12 +958,62 @@ sync_thread_levels_contain(
 }
 
 /******************************************************************//**
+Checks if the level array for the current thread contains a
+mutex or rw-latch at the specified level.
+@return	a matching latch, or NULL if not found */
+UNIV_INTERN
+void*
+sync_thread_levels_contains(
+/*========================*/
+	ulint	level)			/*!< in: latching order level
+					(SYNC_DICT, ...)*/
+{
+	sync_level_t*	arr;
+	sync_thread_t*	thread_slot;
+	sync_level_t*	slot;
+	ulint		i;
+
+	if (!sync_order_checks_on) {
+
+		return(NULL);
+	}
+
+	mutex_enter(&sync_thread_mutex);
+
+	thread_slot = sync_thread_level_arrays_find_slot();
+
+	if (thread_slot == NULL) {
+
+		mutex_exit(&sync_thread_mutex);
+
+		return(NULL);
+	}
+
+	arr = thread_slot->levels;
+
+	for (i = 0; i < SYNC_THREAD_N_LEVELS; i++) {
+
+		slot = sync_thread_levels_get_nth(arr, i);
+
+		if (slot->latch != NULL && slot->level == level) {
+
+			mutex_exit(&sync_thread_mutex);
+			return(slot->latch);
+		}
+	}
+
+	mutex_exit(&sync_thread_mutex);
+
+	return(NULL);
+}
+
+/******************************************************************//**
 Checks that the level array for the current thread is empty.
-@return	TRUE if empty except the exceptions specified below */
+@return	a latch, or NULL if empty except the exceptions specified below */
 UNIV_INTERN
-ibool
-sync_thread_levels_empty_gen(
-/*=========================*/
+void*
+sync_thread_levels_nonempty_gen(
+/*============================*/
 	ibool	dict_mutex_allowed)	/*!< in: TRUE if dictionary mutex is
 					allowed to be owned by the thread,
 					also purge_is_running mutex is
@@ -965,7 +1026,7 @@ sync_thread_levels_empty_gen(
 
 	if (!sync_order_checks_on) {
 
-		return(TRUE);
+		return(NULL);
 	}
 
 	mutex_enter(&sync_thread_mutex);
@@ -976,7 +1037,7 @@ sync_thread_levels_empty_gen(
 
 		mutex_exit(&sync_thread_mutex);
 
-		return(TRUE);
+		return(NULL);
 	}
 
 	arr = thread_slot->levels;
@@ -993,13 +1054,13 @@ sync_thread_levels_empty_gen(
 			mutex_exit(&sync_thread_mutex);
 			ut_error;
 
-			return(FALSE);
+			return(slot->latch);
 		}
 	}
 
 	mutex_exit(&sync_thread_mutex);
 
-	return(TRUE);
+	return(NULL);
 }
 
 /******************************************************************//**
@@ -1370,6 +1431,12 @@ sync_close(void)
 	mutex = UT_LIST_GET_FIRST(mutex_list);
 
 	while (mutex) {
+#ifdef UNIV_MEM_DEBUG
+		if (mutex == &mem_hash_mutex) {
+			mutex = UT_LIST_GET_NEXT(list, mutex);
+			continue;
+		}
+#endif /* UNIV_MEM_DEBUG */
 		mutex_free(mutex);
 		mutex = UT_LIST_GET_FIRST(mutex_list);
 	}

=== modified file 'storage/innodb_plugin/trx/trx0i_s.c'
--- a/storage/innodb_plugin/trx/trx0i_s.c	2009-12-01 10:38:40 +0000
+++ b/storage/innodb_plugin/trx/trx0i_s.c	2010-04-01 13:01:22 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 2007, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 2007, 2010, Innobase Oy. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -28,11 +28,18 @@ table cache" for later retrieval.
 Created July 17, 2007 Vasil Dimov
 *******************************************************/
 
+/* Found during the build of 5.5.3 on Linux 2.4 and early 2.6 kernels:
+   The includes "univ.i" -> "my_global.h" cause a different path
+   to be taken further down with pthread functions and types,
+   so they must come first.
+   From the symptoms, this is related to bug#46587 in the MySQL bug DB.
+*/
+#include "univ.i"
+
 #include <mysql/plugin.h>
 
 #include "mysql_addons.h"
 
-#include "univ.i"
 #include "buf0buf.h"
 #include "dict0dict.h"
 #include "ha0storage.h"

=== modified file 'storage/innodb_plugin/trx/trx0rec.c'
--- a/storage/innodb_plugin/trx/trx0rec.c	2009-10-08 10:00:49 +0000
+++ b/storage/innodb_plugin/trx/trx0rec.c	2010-04-01 13:01:22 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -977,6 +977,7 @@ trx_undo_update_rec_get_update(
 			fprintf(stderr, "\n"
 				"InnoDB: n_fields = %lu, i = %lu, ptr %p\n",
 				(ulong) n_fields, (ulong) i, ptr);
+			*upd = NULL;
 			return(NULL);
 		}
 

=== modified file 'storage/innodb_plugin/trx/trx0rseg.c'
--- a/storage/innodb_plugin/trx/trx0rseg.c	2009-11-30 11:32:05 +0000
+++ b/storage/innodb_plugin/trx/trx0rseg.c	2010-04-01 11:59:25 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -286,39 +286,3 @@ trx_rseg_list_and_array_init(
 		}
 	}
 }
-
-/****************************************************************//**
-Creates a new rollback segment to the database.
-@return	the created segment object, NULL if fail */
-UNIV_INTERN
-trx_rseg_t*
-trx_rseg_create(
-/*============*/
-	ulint	space,		/*!< in: space id */
-	ulint	max_size,	/*!< in: max size in pages */
-	ulint*	id,		/*!< out: rseg id */
-	mtr_t*	mtr)		/*!< in: mtr */
-{
-	ulint		flags;
-	ulint		zip_size;
-	ulint		page_no;
-	trx_rseg_t*	rseg;
-
-	mtr_x_lock(fil_space_get_latch(space, &flags), mtr);
-	zip_size = dict_table_flags_to_zip_size(flags);
-	mutex_enter(&kernel_mutex);
-
-	page_no = trx_rseg_header_create(space, zip_size, max_size, id, mtr);
-
-	if (page_no == FIL_NULL) {
-
-		mutex_exit(&kernel_mutex);
-		return(NULL);
-	}
-
-	rseg = trx_rseg_mem_create(*id, space, zip_size, page_no, mtr);
-
-	mutex_exit(&kernel_mutex);
-
-	return(rseg);
-}

=== modified file 'storage/innodb_plugin/trx/trx0sys.c'
--- a/storage/innodb_plugin/trx/trx0sys.c	2009-11-30 11:32:05 +0000
+++ b/storage/innodb_plugin/trx/trx0sys.c	2010-04-01 13:01:22 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -584,8 +584,8 @@ trx_sys_doublewrite_init_or_restore_page
 						" recover the database"
 						" with the my.cnf\n"
 						"InnoDB: option:\n"
-						"InnoDB: set-variable="
-						"innodb_force_recovery=6\n");
+						"InnoDB:"
+						" innodb_force_recovery=6\n");
 					exit(1);
 				}
 
@@ -1535,6 +1535,7 @@ trx_sys_file_format_id_to_name(
 
 #endif /* !UNIV_HOTBACKUP */
 
+#ifndef UNIV_HOTBACKUP
 /*********************************************************************
 Shutdown/Close the transaction system. */
 UNIV_INTERN
@@ -1611,3 +1612,4 @@ trx_sys_close(void)
 	trx_sys = NULL;
 	mutex_exit(&kernel_mutex);
 }
+#endif /* !UNIV_HOTBACKUP */

=== modified file 'storage/innodb_plugin/trx/trx0trx.c'
--- a/storage/innodb_plugin/trx/trx0trx.c	2009-12-01 10:38:40 +0000
+++ b/storage/innodb_plugin/trx/trx0trx.c	2010-04-01 11:59:25 +0000
@@ -1,6 +1,6 @@
 /*****************************************************************************
 
-Copyright (c) 1996, 2009, Innobase Oy. All Rights Reserved.
+Copyright (c) 1996, 2010, Innobase Oy. All Rights Reserved.
 
 This program is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free Software
@@ -425,6 +425,7 @@ trx_lists_init_at_db_start(void)
 	trx_undo_t*	undo;
 	trx_t*		trx;
 
+	ut_ad(mutex_own(&kernel_mutex));
 	UT_LIST_INIT(trx_sys->trx_list);
 
 	/* Look from the rollback segments if there exist undo logs for

=== added file 'storage/innodb_plugin/ut/ut0rbt.c'
--- a/storage/innodb_plugin/ut/ut0rbt.c	1970-01-01 00:00:00 +0000
+++ b/storage/innodb_plugin/ut/ut0rbt.c	2010-04-01 12:56:22 +0000
@@ -0,0 +1,1249 @@
+/*****************************************************************************
+
+Copyright (c) 2006, 2009, Innobase Oy. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+Place, Suite 330, Boston, MA 02111-1307 USA
+
+*****************************************************************************/
+
+/*******************************************************************//**
+@file ut/ut0rbt.c
+Red-Black tree implementation
+
+Created 2007-03-20 Sunny Bains
+***********************************************************************/
+
+#include "ut0rbt.h"
+
+/************************************************************************
+Definition of a red-black tree
+==============================
+
+A red-black tree is a binary search tree which has the following
+red-black properties:
+
+   1. Every node is either red or black.
+   2. Every leaf (NULL - in our case tree->nil) is black.
+   3. If a node is red, then both its children are black.
+   4. Every simple path from a node to a descendant leaf contains the
+      same number of black nodes.
+
+   from (3) above, the implication is that on any path from the root
+   to a leaf, red nodes must not be adjacent.
+
+   However, any number of black nodes may appear in a sequence. */
+
+#if	defined(IB_RBT_TESTING)
+#warning "Testing enabled!"
+#endif
+
+#define ROOT(t)		(t->root->left)
+#define	SIZEOF_NODE(t)	((sizeof(ib_rbt_node_t) + t->sizeof_value) - 1)
+
+/****************************************************************//**
+Print out the sub-tree recursively. */
+static
+void
+rbt_print_subtree(
+/*==============*/
+	const ib_rbt_t*		tree,	/*!< in: tree to traverse */
+	const ib_rbt_node_t*	node,	/*!< in: node to print */
+	ib_rbt_print_node	print)	/*!< in: print key function */
+{
+	/* FIXME: Doesn't do anything yet */
+	if (node != tree->nil) {
+		print(node);
+		rbt_print_subtree(tree, node->left, print);
+		rbt_print_subtree(tree, node->right, print);
+	}
+}
+
+/****************************************************************//**
+Verify that the keys are in order.
+@return	TRUE of OK. FALSE if not ordered */
+static
+ibool
+rbt_check_ordering(
+/*===============*/
+	const ib_rbt_t*		tree)	/*!< in: tree to verfify */
+{
+	const ib_rbt_node_t*	node;
+	const ib_rbt_node_t*	prev = NULL;
+
+	/* Iterate over all the nodes, comparing each node with the prev */
+	for (node = rbt_first(tree); node; node = rbt_next(tree, prev)) {
+
+		if (prev && tree->compare(prev->value, node->value) >= 0) {
+			return(FALSE);
+		}
+
+		prev = node;
+	}
+
+	return(TRUE);
+}
+
+/****************************************************************//**
+Check that every path from the root to the leaves has the same count.
+Count is expressed in the number of black nodes.
+@return	0 on failure else black height of the subtree */
+static
+ibool
+rbt_count_black_nodes(
+/*==================*/
+	const ib_rbt_t*		tree,	/*!< in: tree to verify */
+	const ib_rbt_node_t*	node)	/*!< in: start of sub-tree */
+{
+	ulint	result;
+
+	if (node != tree->nil) {
+		ulint	left_height = rbt_count_black_nodes(tree, node->left);
+
+		ulint	right_height = rbt_count_black_nodes(tree, node->right);
+
+		if (left_height == 0
+		    || right_height == 0
+		    || left_height != right_height) {
+
+			result = 0;
+		} else if (node->color == IB_RBT_RED) {
+
+			/* Case 3 */
+			if (node->left->color != IB_RBT_BLACK
+			    || node->right->color != IB_RBT_BLACK) {
+
+				result = 0;
+			} else {
+				result = left_height;
+			}
+		/* Check if it's anything other than RED or BLACK. */
+		} else if (node->color != IB_RBT_BLACK) {
+
+			result = 0;
+		} else {
+
+			result = right_height + 1;
+		}
+	} else {
+		result = 1;
+	}
+
+	return(result);
+}
+
+/****************************************************************//**
+Turn the node's right child's left sub-tree into node's right sub-tree.
+This will also make node's right child it's parent. */
+static
+void
+rbt_rotate_left(
+/*============*/
+	const ib_rbt_node_t*	nil,	/*!< in: nil node of the tree */
+	ib_rbt_node_t*		node)	/*!< in: node to rotate */
+{
+	ib_rbt_node_t*	right = node->right;
+
+	node->right = right->left;
+
+	if (right->left != nil) {
+		right->left->parent = node;
+	}
+
+	/* Right's new parent was node's parent. */
+	right->parent = node->parent;
+
+	/* Since root's parent is tree->nil and root->parent->left points
+	back to root, we can avoid the check. */
+	if (node == node->parent->left) {
+		/* Node was on the left of its parent. */
+		node->parent->left = right;
+	} else {
+		/* Node must have been on the right. */
+		node->parent->right = right;
+	}
+
+	/* Finally, put node on right's left. */
+	right->left = node;
+	node->parent = right;
+}
+
+/****************************************************************//**
+Turn the node's left child's right sub-tree into node's left sub-tree.
+This also make node's left child it's parent. */
+static
+void
+rbt_rotate_right(
+/*=============*/
+	const ib_rbt_node_t*	nil,	/*!< in: nil node of tree */
+	ib_rbt_node_t*		node)	/*!< in: node to rotate */
+{
+	ib_rbt_node_t*	left = node->left;
+
+	node->left = left->right;
+
+	if (left->right != nil) {
+		left->right->parent = node;
+	}
+
+	/* Left's new parent was node's parent. */
+	left->parent = node->parent;
+
+	/* Since root's parent is tree->nil and root->parent->left points
+	back to root, we can avoid the check. */
+	if (node == node->parent->right) {
+	    /* Node was on the left of its parent. */
+            node->parent->right = left;
+	} else {
+	    /* Node must have been on the left. */
+            node->parent->left = left;
+	}
+
+	/* Finally, put node on left's right. */
+	left->right = node;
+	node->parent = left;
+}
+
+/****************************************************************//**
+Append a node to the tree.
+@return inserted node */
+static
+ib_rbt_node_t*
+rbt_tree_add_child(
+/*===============*/
+	const ib_rbt_t*	tree,		/*!< in: rbt tree */
+	ib_rbt_bound_t*	parent,		/*!< in: node's parent */
+	ib_rbt_node_t*	node)		/*!< in: node to add */
+{
+	/* Cast away the const. */
+	ib_rbt_node_t*	last = (ib_rbt_node_t*) parent->last;
+
+	if (last == tree->root || parent->result < 0) {
+		last->left = node;
+	} else {
+		/* FIXME: We don't handle duplicates (yet)! */
+		ut_a(parent->result != 0);
+
+		last->right = node;
+	}
+
+	node->parent = last;
+
+	return(node);
+}
+
+/****************************************************************//**
+Generic binary tree insert
+@return inserted node */
+static
+ib_rbt_node_t*
+rbt_tree_insert(
+/*============*/
+	ib_rbt_t*	tree,		/*!< in: rb tree */
+	const void*	key,		/*!< in: key for ordering */
+	ib_rbt_node_t*	node)		/*!< in: node hold the insert value */
+{
+	ib_rbt_bound_t	parent;
+	ib_rbt_node_t*	current = ROOT(tree);
+
+	parent.result = 0;
+	parent.last = tree->root;
+
+	/* Regular binary search. */
+	while (current != tree->nil) {
+
+		parent.last = current;
+		parent.result = tree->compare(key, current->value);
+
+		if (parent.result < 0) {
+			current = current->left;
+		} else {
+			current = current->right;
+		}
+	}
+
+	ut_a(current == tree->nil);
+
+	rbt_tree_add_child(tree, &parent, node);
+
+	return(node);
+}
+
+/****************************************************************//**
+Balance a tree after inserting a node. */
+static
+void
+rbt_balance_tree(
+/*=============*/
+	const ib_rbt_t*	tree,		/*!< in: tree to balance */
+	ib_rbt_node_t*	node)		/*!< in: node that was inserted */
+{
+	const ib_rbt_node_t*	nil = tree->nil;
+	ib_rbt_node_t*		parent = node->parent;
+
+	/* Restore the red-black property. */
+	node->color = IB_RBT_RED;
+
+	while (node != ROOT(tree) && parent->color == IB_RBT_RED) {
+		ib_rbt_node_t*	grand_parent = parent->parent;
+
+		if (parent == grand_parent->left) {
+			ib_rbt_node_t*	uncle = grand_parent->right;
+
+			if (uncle->color == IB_RBT_RED) {
+
+				/* Case 1 - change the colors. */
+				uncle->color = IB_RBT_BLACK;
+				parent->color = IB_RBT_BLACK;
+				grand_parent->color = IB_RBT_RED;
+
+				/* Move node up the tree. */
+				node = grand_parent;
+
+			} else {
+
+				if (node == parent->right) {
+					/* Right is a black node and node is
+					to the right, case 2 - move node
+					up and rotate. */
+					node = parent;
+					rbt_rotate_left(nil, node);
+				}
+
+				grand_parent = node->parent->parent;
+
+				/* Case 3. */
+				node->parent->color = IB_RBT_BLACK;
+				grand_parent->color = IB_RBT_RED;
+
+				rbt_rotate_right(nil, grand_parent);
+			}
+
+		} else {
+			ib_rbt_node_t*	uncle = grand_parent->left;
+
+			if (uncle->color == IB_RBT_RED) {
+
+				/* Case 1 - change the colors. */
+				uncle->color = IB_RBT_BLACK;
+				parent->color = IB_RBT_BLACK;
+				grand_parent->color = IB_RBT_RED;
+
+				/* Move node up the tree. */
+				node = grand_parent;
+
+			} else {
+
+				if (node == parent->left) {
+					/* Left is a black node and node is to
+					the right, case 2 - move node up and
+					rotate. */
+					node = parent;
+					rbt_rotate_right(nil, node);
+				}
+
+				grand_parent = node->parent->parent;
+
+				/* Case 3. */
+				node->parent->color = IB_RBT_BLACK;
+				grand_parent->color = IB_RBT_RED;
+
+				rbt_rotate_left(nil, grand_parent);
+			}
+		}
+
+		parent = node->parent;
+	}
+
+	/* Color the root black. */
+	ROOT(tree)->color = IB_RBT_BLACK;
+}
+
+/****************************************************************//**
+Find the given node's successor.
+@return	successor node or NULL if no successor */
+static
+ib_rbt_node_t*
+rbt_find_successor(
+/*===============*/
+	const ib_rbt_t*		tree,	/*!< in: rb tree */
+	const ib_rbt_node_t*	current)/*!< in: this is declared const
+					because it can be called via
+					rbt_next() */
+{
+	const ib_rbt_node_t*	nil = tree->nil;
+	ib_rbt_node_t*		next = current->right;
+
+	/* Is there a sub-tree to the right that we can follow. */
+	if (next != nil) {
+
+		/* Follow the left most links of the current right child. */
+		while (next->left != nil) {
+			next = next->left;
+		}
+
+	} else { /* We will have to go up the tree to find the successor. */
+		ib_rbt_node_t*	parent = current->parent;
+
+		/* Cast away the const. */
+		next = (ib_rbt_node_t*) current;
+
+		while (parent != tree->root && next == parent->right) {
+			next = parent;
+			parent = next->parent;
+		}
+
+		next = (parent == tree->root) ? NULL : parent;
+	}
+
+	return(next);
+}
+
+/****************************************************************//**
+Find the given node's precedecessor.
+@return	predecessor node or NULL if no predecesor */
+static
+ib_rbt_node_t*
+rbt_find_predecessor(
+/*=================*/
+	const ib_rbt_t*		tree,		/*!< in: rb tree */
+	const ib_rbt_node_t*	current)	/*!< in: this is declared const
+						because it can be called via
+						rbt_prev() */
+{
+	const ib_rbt_node_t*	nil = tree->nil;
+	ib_rbt_node_t*		prev = current->left;
+
+	/* Is there a sub-tree to the left that we can follow. */
+	if (prev != nil) {
+
+		/* Follow the right most links of the current left child. */
+		while (prev->right != nil) {
+			prev = prev->right;
+		}
+
+	} else { /* We will have to go up the tree to find the precedecessor. */
+		ib_rbt_node_t*	parent = current->parent;
+
+		/* Cast away the const. */
+		prev = (ib_rbt_node_t*)current;
+
+		while (parent != tree->root && prev == parent->left) {
+			prev = parent;
+			parent = prev->parent;
+		}
+
+		prev = (parent == tree->root) ? NULL : parent;
+	}
+
+	return(prev);
+}
+
+/****************************************************************//**
+Replace node with child. After applying transformations eject becomes
+an orphan. */
+static
+void
+rbt_eject_node(
+/*===========*/
+	ib_rbt_node_t*	eject,		/*!< in: node to eject */
+	ib_rbt_node_t*	node)		/*!< in: node to replace with */
+{
+	/* Update the to be ejected node's parent's child pointers. */
+	if (eject->parent->left == eject) {
+		eject->parent->left = node;
+	} else if (eject->parent->right == eject) {
+		eject->parent->right = node;
+	} else {
+		ut_a(0);
+	}
+	/* eject is now an orphan but otherwise its pointers
+	and color are left intact. */
+
+	node->parent = eject->parent;
+}
+
+/****************************************************************//**
+Replace a node with another node. */
+static
+void
+rbt_replace_node(
+/*=============*/
+	ib_rbt_node_t*	replace,	/*!< in: node to replace */
+	ib_rbt_node_t*	node)		/*!< in: node to replace with */
+{
+	ib_rbt_color_t	color = node->color;
+
+	/* Update the node pointers. */
+	node->left = replace->left;
+	node->right = replace->right;
+
+	/* Update the child node pointers. */
+	node->left->parent = node;
+	node->right->parent = node;
+
+	/* Make the parent of replace point to node. */
+	rbt_eject_node(replace, node);
+
+	/* Swap the colors. */
+	node->color = replace->color;
+	replace->color = color;
+}
+
+/****************************************************************//**
+Detach node from the tree replacing it with one of it's children.
+@return	the child node that now occupies the position of the detached node */
+static
+ib_rbt_node_t*
+rbt_detach_node(
+/*============*/
+	const ib_rbt_t*	tree,		/*!< in: rb tree */
+	ib_rbt_node_t*	node)		/*!< in: node to detach */
+{
+	ib_rbt_node_t*		child;
+	const ib_rbt_node_t*	nil = tree->nil;
+
+	if (node->left != nil && node->right != nil) {
+		/* Case where the node to be deleted has two children. */
+		ib_rbt_node_t*	successor = rbt_find_successor(tree, node);
+
+		ut_a(successor != nil);
+		ut_a(successor->parent != nil);
+		ut_a(successor->left == nil);
+
+		child = successor->right;
+
+		/* Remove the successor node and replace with its child. */
+		rbt_eject_node(successor, child);
+
+		/* Replace the node to delete with its successor node. */
+		rbt_replace_node(node, successor);
+	} else {
+		ut_a(node->left == nil || node->right == nil);
+
+		child = (node->left != nil) ? node->left : node->right;
+
+		/* Replace the node to delete with one of it's children. */
+		rbt_eject_node(node, child);
+	}
+
+	/* Reset the node links. */
+	node->parent = node->right = node->left = tree->nil;
+
+	return(child);
+}
+
+/****************************************************************//**
+Rebalance the right sub-tree after deletion.
+@return	node to rebalance if more rebalancing required else NULL */
+static
+ib_rbt_node_t*
+rbt_balance_right(
+/*==============*/
+	const ib_rbt_node_t*	nil,	/*!< in: rb tree nil node */
+	ib_rbt_node_t*		parent,	/*!< in: parent node */
+	ib_rbt_node_t*		sibling)/*!< in: sibling node */
+{
+	ib_rbt_node_t*		node = NULL;
+
+	ut_a(sibling != nil);
+
+	/* Case 3. */
+	if (sibling->color == IB_RBT_RED) {
+
+		parent->color = IB_RBT_RED;
+		sibling->color = IB_RBT_BLACK;
+
+		rbt_rotate_left(nil, parent);
+
+		sibling = parent->right;
+
+		ut_a(sibling != nil);
+	}
+
+	/* Since this will violate case 3 because of the change above. */
+	if (sibling->left->color == IB_RBT_BLACK
+	    && sibling->right->color == IB_RBT_BLACK) {
+
+		node = parent; /* Parent needs to be rebalanced too. */
+		sibling->color = IB_RBT_RED;
+
+	} else {
+		if (sibling->right->color == IB_RBT_BLACK) {
+
+			ut_a(sibling->left->color == IB_RBT_RED);
+
+			sibling->color = IB_RBT_RED;
+			sibling->left->color = IB_RBT_BLACK;
+
+			rbt_rotate_right(nil, sibling);
+
+			sibling = parent->right;
+			ut_a(sibling != nil);
+		}
+
+		sibling->color = parent->color;
+		sibling->right->color = IB_RBT_BLACK;
+
+		parent->color = IB_RBT_BLACK;
+
+		rbt_rotate_left(nil, parent);
+	}
+
+	return(node);
+}
+
+/****************************************************************//**
+Rebalance the left sub-tree after deletion.
+@return	node to rebalance if more rebalancing required else NULL */
+static
+ib_rbt_node_t*
+rbt_balance_left(
+/*=============*/
+	const ib_rbt_node_t*	nil,	/*!< in: rb tree nil node */
+	ib_rbt_node_t*		parent,	/*!< in: parent node */
+	ib_rbt_node_t*		sibling)/*!< in: sibling node */
+{
+	ib_rbt_node_t*	node = NULL;
+
+	ut_a(sibling != nil);
+
+	/* Case 3. */
+	if (sibling->color == IB_RBT_RED) {
+
+		parent->color = IB_RBT_RED;
+		sibling->color = IB_RBT_BLACK;
+
+		rbt_rotate_right(nil, parent);
+		sibling = parent->left;
+
+		ut_a(sibling != nil);
+	}
+
+	/* Since this will violate case 3 because of the change above. */
+	if (sibling->right->color == IB_RBT_BLACK
+	    && sibling->left->color == IB_RBT_BLACK) {
+
+		node = parent; /* Parent needs to be rebalanced too. */
+		sibling->color = IB_RBT_RED;
+
+	} else {
+		if (sibling->left->color == IB_RBT_BLACK) {
+
+			ut_a(sibling->right->color == IB_RBT_RED);
+
+			sibling->color = IB_RBT_RED;
+			sibling->right->color = IB_RBT_BLACK;
+
+			rbt_rotate_left(nil, sibling);
+
+			sibling = parent->left;
+
+			ut_a(sibling != nil);
+		}
+
+		sibling->color = parent->color;
+		sibling->left->color = IB_RBT_BLACK;
+
+		parent->color = IB_RBT_BLACK;
+
+		rbt_rotate_right(nil, parent);
+	}
+
+	return(node);
+}
+
+/****************************************************************//**
+Delete the node and rebalance the tree if necessary */
+static
+void
+rbt_remove_node_and_rebalance(
+/*==========================*/
+	ib_rbt_t*	tree,		/*!< in: rb tree */
+	ib_rbt_node_t*	node)		/*!< in: node to remove */
+{
+	/* Detach node and get the node that will be used
+	as rebalance start. */
+	ib_rbt_node_t*	child = rbt_detach_node(tree, node);
+
+	if (node->color == IB_RBT_BLACK) {
+		ib_rbt_node_t*	last = child;
+
+		ROOT(tree)->color = IB_RBT_RED;
+
+		while (child && child->color == IB_RBT_BLACK) {
+			ib_rbt_node_t*	parent = child->parent;
+
+			/* Did the deletion cause an imbalance in the
+			parents left sub-tree. */
+			if (parent->left == child) {
+
+				child = rbt_balance_right(
+					tree->nil, parent, parent->right);
+
+			} else if (parent->right == child) {
+
+				child = rbt_balance_left(
+					tree->nil, parent, parent->left);
+
+			} else {
+				ut_error;
+			}
+
+			if (child) {
+				last = child;
+			}
+		}
+
+		ut_a(last);
+
+		last->color = IB_RBT_BLACK;
+		ROOT(tree)->color = IB_RBT_BLACK;
+	}
+
+	/* Note that we have removed a node from the tree. */
+	--tree->n_nodes;
+}
+
+/****************************************************************//**
+Recursively free the nodes. */
+static
+void
+rbt_free_node(
+/*==========*/
+	ib_rbt_node_t*	node,		/*!< in: node to free */
+	ib_rbt_node_t*	nil)		/*!< in: rb tree nil node */
+{
+	if (node != nil) {
+		rbt_free_node(node->left, nil);
+		rbt_free_node(node->right, nil);
+
+		ut_free(node);
+	}
+}
+
+/****************************************************************//**
+Free all the nodes and free the tree. */
+UNIV_INTERN
+void
+rbt_free(
+/*=====*/
+	ib_rbt_t*	tree)		/*!< in: rb tree to free */
+{
+	rbt_free_node(tree->root, tree->nil);
+	ut_free(tree->nil);
+	ut_free(tree);
+}
+
+/****************************************************************//**
+Create an instance of a red black tree.
+@return	an empty rb tree */
+UNIV_INTERN
+ib_rbt_t*
+rbt_create(
+/*=======*/
+	size_t		sizeof_value,	/*!< in: sizeof data item */
+	ib_rbt_compare	compare)	/*!< in: fn to compare items */
+{
+	ib_rbt_t*	tree;
+	ib_rbt_node_t*	node;
+
+	tree = (ib_rbt_t*) ut_malloc(sizeof(*tree));
+	memset(tree, 0, sizeof(*tree));
+
+	tree->sizeof_value = sizeof_value;
+
+	/* Create the sentinel (NIL) node. */
+	node = tree->nil = (ib_rbt_node_t*) ut_malloc(sizeof(*node));
+	memset(node, 0, sizeof(*node));
+
+	node->color = IB_RBT_BLACK;
+	node->parent = node->left = node->right = node;
+
+	/* Create the "fake" root, the real root node will be the
+	left child of this node. */
+	node = tree->root = (ib_rbt_node_t*) ut_malloc(sizeof(*node));
+	memset(node, 0, sizeof(*node));
+
+	node->color = IB_RBT_BLACK;
+	node->parent = node->left = node->right = tree->nil;
+
+	tree->compare = compare;
+
+	return(tree);
+}
+
+/****************************************************************//**
+Generic insert of a value in the rb tree.
+@return	inserted node */
+UNIV_INTERN
+const ib_rbt_node_t*
+rbt_insert(
+/*=======*/
+	ib_rbt_t*	tree,		/*!< in: rb tree */
+	const void*	key,		/*!< in: key for ordering */
+	const void*	value)		/*!< in: value of key, this value
+					is copied to the node */
+{
+	ib_rbt_node_t*	node;
+
+	/* Create the node that will hold the value data. */
+	node = (ib_rbt_node_t*) ut_malloc(SIZEOF_NODE(tree));
+
+	memcpy(node->value, value, tree->sizeof_value);
+	node->parent = node->left = node->right = tree->nil;
+
+	/* Insert in the tree in the usual way. */
+	rbt_tree_insert(tree, key, node);
+	rbt_balance_tree(tree, node);
+
+	++tree->n_nodes;
+
+	return(node);
+}
+
+/****************************************************************//**
+Add a new node to the tree, useful for data that is pre-sorted.
+@return	appended node */
+UNIV_INTERN
+const ib_rbt_node_t*
+rbt_add_node(
+/*=========*/
+	ib_rbt_t*	tree,		/*!< in: rb tree */
+	ib_rbt_bound_t*	parent,		/*!< in: bounds */
+	const void*	value)		/*!< in: this value is copied
+					to the node */
+{
+	ib_rbt_node_t*	node;
+
+	/* Create the node that will hold the value data */
+	node = (ib_rbt_node_t*) ut_malloc(SIZEOF_NODE(tree));
+
+	memcpy(node->value, value, tree->sizeof_value);
+	node->parent = node->left = node->right = tree->nil;
+
+	/* If tree is empty */
+	if (parent->last == NULL) {
+		parent->last = tree->root;
+	}
+
+	/* Append the node, the hope here is that the caller knows
+	what s/he is doing. */
+	rbt_tree_add_child(tree, parent, node);
+	rbt_balance_tree(tree, node);
+
+	++tree->n_nodes;
+
+#if	defined(IB_RBT_TESTING)
+	ut_a(rbt_validate(tree));
+#endif
+	return(node);
+}
+
+/****************************************************************//**
+Find a matching node in the rb tree.
+@return	NULL if not found else the node where key was found */
+UNIV_INTERN
+const ib_rbt_node_t*
+rbt_lookup(
+/*=======*/
+	const ib_rbt_t*	tree,		/*!< in: rb tree */
+	const void*	key)		/*!< in: key to use for search */
+{
+	const ib_rbt_node_t*	current = ROOT(tree);
+
+	/* Regular binary search. */
+	while (current != tree->nil) {
+		int	result = tree->compare(key, current->value);
+
+		if (result < 0) {
+			current = current->left;
+		} else if (result > 0) {
+			current = current->right;
+		} else {
+			break;
+		}
+	}
+
+	return(current != tree->nil ? current : NULL);
+}
+
+/****************************************************************//**
+Delete a node from the red black tree, identified by key.
+@return	TRUE if success FALSE if not found */
+UNIV_INTERN
+ibool
+rbt_delete(
+/*=======*/
+	ib_rbt_t*	tree,		/*!< in: rb tree */
+	const void*	key)		/*!< in: key to delete */
+{
+	ibool		deleted = FALSE;
+	ib_rbt_node_t*	node = (ib_rbt_node_t*) rbt_lookup(tree, key);
+
+	if (node) {
+		rbt_remove_node_and_rebalance(tree, node);
+
+		ut_free(node);
+		deleted = TRUE;
+	}
+
+	return(deleted);
+}
+
+/****************************************************************//**
+Remove a node from the rb tree, the node is not free'd, that is the
+callers responsibility.
+@return	deleted node but without the const */
+UNIV_INTERN
+ib_rbt_node_t*
+rbt_remove_node(
+/*============*/
+	ib_rbt_t*		tree,		/*!< in: rb tree */
+	const ib_rbt_node_t*	const_node)	/*!< in: node to delete, this
+						is a fudge and declared const
+						because the caller can access
+						only const nodes */
+{
+	/* Cast away the const. */
+	rbt_remove_node_and_rebalance(tree, (ib_rbt_node_t*) const_node);
+
+	/* This is to make it easier to do something like this:
+		ut_free(rbt_remove_node(node));
+	*/
+
+	return((ib_rbt_node_t*) const_node);
+}
+
+/****************************************************************//**
+Find the node that has the lowest key that is >= key.
+@return	node satisfying the lower bound constraint or NULL */
+UNIV_INTERN
+const ib_rbt_node_t*
+rbt_lower_bound(
+/*============*/
+	const ib_rbt_t*	tree,		/*!< in: rb tree */
+	const void*	key)		/*!< in: key to search */
+{
+	ib_rbt_node_t*	lb_node = NULL;
+	ib_rbt_node_t*	current = ROOT(tree);
+
+	while (current != tree->nil) {
+		int result = tree->compare(key, current->value);
+
+		if (result > 0) {
+
+			current = current->right;
+
+		} else if (result < 0) {
+
+			lb_node = current;
+			current = current->left;
+
+		} else {
+			lb_node = current;
+			break;
+		}
+	}
+
+	return(lb_node);
+}
+
+/****************************************************************//**
+Find the node that has the greatest key that is <= key.
+@return	node satisfying the upper bound constraint or NULL */
+UNIV_INTERN
+const ib_rbt_node_t*
+rbt_upper_bound(
+/*============*/
+	const ib_rbt_t*	tree,		/*!< in: rb tree */
+	const void*	key)		/*!< in: key to search */
+{
+	ib_rbt_node_t*	ub_node = NULL;
+	ib_rbt_node_t*	current = ROOT(tree);
+
+	while (current != tree->nil) {
+		int result = tree->compare(key, current->value);
+
+		if (result > 0) {
+
+			ub_node = current;
+			current = current->right;
+
+		} else if (result < 0) {
+
+			current = current->left;
+
+		} else {
+			ub_node = current;
+			break;
+		}
+	}
+
+	return(ub_node);
+}
+
+/****************************************************************//**
+Find the node that has the greatest key that is <= key.
+@return	value of result */
+UNIV_INTERN
+int
+rbt_search(
+/*=======*/
+	const ib_rbt_t*	tree,		/*!< in: rb tree */
+	ib_rbt_bound_t*	parent,		/*!< in: search bounds */
+	const void*	key)		/*!< in: key to search */
+{
+	ib_rbt_node_t*	current = ROOT(tree);
+
+	/* Every thing is greater than the NULL root. */
+	parent->result = 1;
+	parent->last = NULL;
+
+	while (current != tree->nil) {
+
+		parent->last = current;
+		parent->result = tree->compare(key, current->value);
+
+		if (parent->result > 0) {
+			current = current->right;
+		} else if (parent->result < 0) {
+			current = current->left;
+		} else {
+			break;
+		}
+	}
+
+	return(parent->result);
+}
+
+/****************************************************************//**
+Find the node that has the greatest key that is <= key. But use the
+supplied comparison function.
+@return	value of result */
+UNIV_INTERN
+int
+rbt_search_cmp(
+/*===========*/
+	const ib_rbt_t*	tree,		/*!< in: rb tree */
+	ib_rbt_bound_t*	parent,		/*!< in: search bounds */
+	const void*	key,		/*!< in: key to search */
+	ib_rbt_compare	compare)	/*!< in: fn to compare items */
+{
+	ib_rbt_node_t*	current = ROOT(tree);
+
+	/* Every thing is greater than the NULL root. */
+	parent->result = 1;
+	parent->last = NULL;
+
+	while (current != tree->nil) {
+
+		parent->last = current;
+		parent->result = compare(key, current->value);
+
+		if (parent->result > 0) {
+			current = current->right;
+		} else if (parent->result < 0) {
+			current = current->left;
+		} else {
+			break;
+		}
+	}
+
+	return(parent->result);
+}
+
+/****************************************************************//**
+Get the leftmost node.
+Return the left most node in the tree. */
+UNIV_INTERN
+const ib_rbt_node_t*
+rbt_first(
+/*======*/
+	const ib_rbt_t*	tree)		/* in: rb tree */
+{
+	ib_rbt_node_t*	first = NULL;
+	ib_rbt_node_t*	current = ROOT(tree);
+
+	while (current != tree->nil) {
+		first = current;
+		current = current->left;
+	}
+
+	return(first);
+}
+
+/****************************************************************//**
+Return the right most node in the tree.
+@return	the rightmost node or NULL */
+UNIV_INTERN
+const ib_rbt_node_t*
+rbt_last(
+/*=====*/
+	const ib_rbt_t*	tree)		/*!< in: rb tree */
+{
+	ib_rbt_node_t*	last = NULL;
+	ib_rbt_node_t*	current = ROOT(tree);
+
+	while (current != tree->nil) {
+		last = current;
+		current = current->right;
+	}
+
+	return(last);
+}
+
+/****************************************************************//**
+Return the next node.
+@return	node next from current */
+UNIV_INTERN
+const ib_rbt_node_t*
+rbt_next(
+/*=====*/
+	const ib_rbt_t*		tree,	/*!< in: rb tree */
+	const ib_rbt_node_t*	current)/*!< in: current node */
+{
+	return(current ? rbt_find_successor(tree, current) : NULL);
+}
+
+/****************************************************************//**
+Return the previous node.
+@return	node prev from current */
+UNIV_INTERN
+const ib_rbt_node_t*
+rbt_prev(
+/*=====*/
+	const ib_rbt_t*		tree,	/*!< in: rb tree */
+	const ib_rbt_node_t*	current)/*!< in: current node */
+{
+	return(current ? rbt_find_predecessor(tree, current) : NULL);
+}
+
+/****************************************************************//**
+Reset the tree. Delete all the nodes. */
+UNIV_INTERN
+void
+rbt_clear(
+/*======*/
+	ib_rbt_t*	tree)		/*!< in: rb tree */
+{
+	rbt_free_node(ROOT(tree), tree->nil);
+
+	tree->n_nodes = 0;
+	tree->root->left = tree->root->right = tree->nil;
+}
+
+/****************************************************************//**
+Merge the node from dst into src. Return the number of nodes merged.
+@return	no. of recs merged */
+UNIV_INTERN
+ulint
+rbt_merge_uniq(
+/*===========*/
+	ib_rbt_t*	dst,		/*!< in: dst rb tree */
+	const ib_rbt_t*	src)		/*!< in: src rb tree */
+{
+	ib_rbt_bound_t		parent;
+	ulint			n_merged = 0;
+	const	ib_rbt_node_t*	src_node = rbt_first(src);
+
+	if (rbt_empty(src) || dst == src) {
+		return(0);
+	}
+
+	for (/* No op */; src_node; src_node = rbt_next(src, src_node)) {
+
+		if (rbt_search(dst, &parent, src_node->value) != 0) {
+			rbt_add_node(dst, &parent, src_node->value);
+			++n_merged;
+		}
+	}
+
+	return(n_merged);
+}
+
+/****************************************************************//**
+Merge the node from dst into src. Return the number of nodes merged.
+Delete the nodes from src after copying node to dst. As a side effect
+the duplicates will be left untouched in the src.
+@return	no. of recs merged */
+UNIV_INTERN
+ulint
+rbt_merge_uniq_destructive(
+/*=======================*/
+	ib_rbt_t*	dst,		/*!< in: dst rb tree */
+	ib_rbt_t*	src)		/*!< in: src rb tree */
+{
+	ib_rbt_bound_t	parent;
+	ib_rbt_node_t*	src_node;
+	ulint		old_size = rbt_size(dst);
+
+	if (rbt_empty(src) || dst == src) {
+		return(0);
+	}
+
+	for (src_node = (ib_rbt_node_t*) rbt_first(src); src_node; /* */) {
+		ib_rbt_node_t*	prev = src_node;
+
+		src_node = (ib_rbt_node_t*)rbt_next(src, prev);
+
+		/* Skip duplicates. */
+		if (rbt_search(dst, &parent, prev->value) != 0) {
+
+			/* Remove and reset the node but preserve
+			the node (data) value. */
+			rbt_remove_node_and_rebalance(src, prev);
+
+			/* The nil should be taken from the dst tree. */
+			prev->parent = prev->left = prev->right = dst->nil;
+			rbt_tree_add_child(dst, &parent, prev);
+			rbt_balance_tree(dst, prev);
+
+			++dst->n_nodes;
+		}
+	}
+
+#if	defined(IB_RBT_TESTING)
+	ut_a(rbt_validate(dst));
+	ut_a(rbt_validate(src));
+#endif
+	return(rbt_size(dst) - old_size);
+}
+
+/****************************************************************//**
+Check that every path from the root to the leaves has the same count and
+the tree nodes are in order.
+@return	TRUE if OK FALSE otherwise */
+UNIV_INTERN
+ibool
+rbt_validate(
+/*=========*/
+	const ib_rbt_t*	tree)		/*!< in: RB tree to validate */
+{
+	if (rbt_count_black_nodes(tree, ROOT(tree)) > 0) {
+		return(rbt_check_ordering(tree));
+	}
+
+	return(FALSE);
+}
+
+/****************************************************************//**
+Iterate over the tree in depth first order. */
+UNIV_INTERN
+void
+rbt_print(
+/*======*/
+	const ib_rbt_t*		tree,	/*!< in: tree to traverse */
+	ib_rbt_print_node	print)	/*!< in: print function */
+{
+	rbt_print_subtree(tree, ROOT(tree), print);
+}


Attachment: [text/bzr-bundle] bzr/svoj@sun.com-20100401151005-c6re90vdvutln15d.bundle
Thread
bzr commit into mysql-5.1-bugteam branch (svoj:3439)Sergey Vojtovich1 Apr