List:Commits« Previous MessageNext Message »
From:antony Date:July 19 2007 1:10am
Subject:bk commit into 5.1 tree (acurtis:1.2556)
View as plain text  
Below is the list of changes that have just been committed into a local
5.1 repository of antony. When antony does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet@stripped, 2007-07-18 18:10:42-07:00, acurtis@stripped +60 -0
  import from innobase snapshot
  some fixups for build and test results.

  libmysqld/Makefile.am@stripped, 2007-07-18 18:10:20-07:00, acurtis@stripped +2 -1
    post import fixes
      ha_innodb.cc no longer have relative paths in include directives.

  mysql-test/include/innodb_trx_weight.inc@stripped, 2007-07-18 18:10:27-07:00, acurtis@stripped +51 -0
    New BitKeeper file ``mysql-test/include/innodb_trx_weight.inc''

  mysql-test/include/innodb_trx_weight.inc@stripped, 2007-07-18 18:10:27-07:00, acurtis@stripped +0 -0

  mysql-test/include/mix1.inc@stripped, 2007-07-18 18:10:21-07:00, acurtis@stripped +1 -0
    post import fixes
      remove bug29807.frm file

  mysql-test/r/innodb.result@stripped, 2007-07-18 18:10:21-07:00, acurtis@stripped +39 -37
    import from innobase snapshot
    post import fixes
      fix some changes in results

  mysql-test/r/innodb_trx_weight.result@stripped, 2007-07-18 18:10:27-07:00, acurtis@stripped +1 -0
    New BitKeeper file ``mysql-test/r/innodb_trx_weight.result''

  mysql-test/r/innodb_trx_weight.result@stripped, 2007-07-18 18:10:27-07:00, acurtis@stripped +0 -0

  mysql-test/t/innodb.test@stripped, 2007-07-18 18:10:21-07:00, acurtis@stripped +50 -44
    import from innobase snapshot
    post import fixes
      fixup innobase code to use correct error codes

  mysql-test/t/innodb_trx_weight.test@stripped, 2007-07-18 18:10:26-07:00, acurtis@stripped +102 -0
    New BitKeeper file ``mysql-test/t/innodb_trx_weight.test''

  mysql-test/t/innodb_trx_weight.test@stripped, 2007-07-18 18:10:26-07:00, acurtis@stripped +0 -0

  storage/innobase/btr/btr0btr.c@stripped, 2007-07-18 18:10:21-07:00, acurtis@stripped +5 -2
    import from innobase snapshot

  storage/innobase/buf/buf0buf.c@stripped, 2007-07-18 18:10:21-07:00, acurtis@stripped +10 -2
    import from innobase snapshot

  storage/innobase/buf/buf0lru.c@stripped, 2007-07-18 18:10:21-07:00, acurtis@stripped +13 -0
    import from innobase snapshot

  storage/innobase/data/data0data.c@stripped, 2007-07-18 18:10:21-07:00, acurtis@stripped +2 -0
    import from innobase snapshot

  storage/innobase/data/data0type.c@stripped, 2007-07-18 18:10:21-07:00, acurtis@stripped +2 -1
    import from innobase snapshot

  storage/innobase/dict/dict0crea.c@stripped, 2007-07-18 18:10:21-07:00, acurtis@stripped +2 -1
    import from innobase snapshot

  storage/innobase/dict/dict0dict.c@stripped, 2007-07-18 18:10:21-07:00, acurtis@stripped +38 -85
    import from innobase snapshot

  storage/innobase/dict/dict0mem.c@stripped, 2007-07-18 18:10:22-07:00, acurtis@stripped +5 -0
    import from innobase snapshot

  storage/innobase/fsp/fsp0fsp.c@stripped, 2007-07-18 18:10:22-07:00, acurtis@stripped +10 -6
    import from innobase snapshot

  storage/innobase/handler/ha_innodb.cc@stripped, 2007-07-18 18:10:22-07:00, acurtis@stripped +832 -634
    import from innobase snapshot
    post import fixups
      some symbol renames

  storage/innobase/handler/ha_innodb.h@stripped, 2007-07-18 18:10:22-07:00, acurtis@stripped +10 -25
    import from innobase snapshot

  storage/innobase/ibuf/ibuf0ibuf.c@stripped, 2007-07-18 18:10:22-07:00, acurtis@stripped +54 -32
    import from innobase snapshot

  storage/innobase/include/buf0buf.ic@stripped, 2007-07-18 18:10:22-07:00, acurtis@stripped +1 -1
    import from innobase snapshot

  storage/innobase/include/db0err.h@stripped, 2007-07-18 18:10:22-07:00, acurtis@stripped +5 -0
    import from innobase snapshot

  storage/innobase/include/dict0dict.h@stripped, 2007-07-18 18:10:22-07:00, acurtis@stripped +25 -25
    import from innobase snapshot

  storage/innobase/include/dict0dict.ic@stripped, 2007-07-18 18:10:23-07:00, acurtis@stripped +24 -0
    import from innobase snapshot

  storage/innobase/include/dict0mem.h@stripped, 2007-07-18 18:10:23-07:00, acurtis@stripped +12 -4
    import from innobase snapshot

  storage/innobase/include/fsp0fsp.h@stripped, 2007-07-18 18:10:23-07:00, acurtis@stripped +1 -1
    import from innobase snapshot

  storage/innobase/include/ha_prototypes.h@stripped, 2007-07-18 18:10:23-07:00, acurtis@stripped +29 -0
    import from innobase snapshot

  storage/innobase/include/lock0lock.h@stripped, 2007-07-18 18:10:23-07:00, acurtis@stripped +1 -1
    import from innobase snapshot

  storage/innobase/include/mem0mem.ic@stripped, 2007-07-18 18:10:23-07:00, acurtis@stripped +6 -0
    import from innobase snapshot

  storage/innobase/include/os0file.h@stripped, 2007-07-18 18:10:23-07:00, acurtis@stripped +2 -1
    import from innobase snapshot

  storage/innobase/include/page0page.h@stripped, 2007-07-18 18:10:23-07:00, acurtis@stripped +9 -0
    import from innobase snapshot

  storage/innobase/include/rem0rec.ic@stripped, 2007-07-18 18:10:23-07:00, acurtis@stripped +4 -2
    import from innobase snapshot

  storage/innobase/include/row0mysql.h@stripped, 2007-07-18 18:10:23-07:00, acurtis@stripped +1 -0
    import from innobase snapshot

  storage/innobase/include/row0sel.h@stripped, 2007-07-18 18:10:24-07:00, acurtis@stripped +10 -0
    import from innobase snapshot

  storage/innobase/include/trx0trx.h@stripped, 2007-07-18 18:10:24-07:00, acurtis@stripped +33 -14
    import from innobase snapshot

  storage/innobase/include/trx0undo.h@stripped, 2007-07-18 18:10:24-07:00, acurtis@stripped +8 -5
    import from innobase snapshot

  storage/innobase/include/univ.i@stripped, 2007-07-18 18:10:24-07:00, acurtis@stripped +16 -0
    import from innobase snapshot

  storage/innobase/include/ut0ut.h@stripped, 2007-07-18 18:10:24-07:00, acurtis@stripped +5 -0
    import from innobase snapshot

  storage/innobase/lock/lock0lock.c@stripped, 2007-07-18 18:10:24-07:00, acurtis@stripped +7 -10
    import from innobase snapshot

  storage/innobase/log/log0log.c@stripped, 2007-07-18 18:10:24-07:00, acurtis@stripped +16 -18
    import from innobase snapshot

  storage/innobase/log/log0recv.c@stripped, 2007-07-18 18:10:24-07:00, acurtis@stripped +147 -100
    import from innobase snapshot

  storage/innobase/mem/mem0mem.c@stripped, 2007-07-18 18:10:24-07:00, acurtis@stripped +1 -0
    import from innobase snapshot

  storage/innobase/mem/mem0pool.c@stripped, 2007-07-18 18:10:25-07:00, acurtis@stripped +6 -0
    import from innobase snapshot

  storage/innobase/os/os0file.c@stripped, 2007-07-18 18:10:25-07:00, acurtis@stripped +17 -2
    import from innobase snapshot

  storage/innobase/page/page0page.c@stripped, 2007-07-18 18:10:25-07:00, acurtis@stripped +12 -0
    import from innobase snapshot

  storage/innobase/rem/rem0rec.c@stripped, 2007-07-18 18:10:25-07:00, acurtis@stripped +32 -14
    import from innobase snapshot

  storage/innobase/row/row0ins.c@stripped, 2007-07-18 18:10:25-07:00, acurtis@stripped +20 -26
    import from innobase snapshot

  storage/innobase/row/row0mysql.c@stripped, 2007-07-18 18:10:25-07:00, acurtis@stripped +6 -1
    import from innobase snapshot

  storage/innobase/row/row0row.c@stripped, 2007-07-18 18:10:25-07:00, acurtis@stripped +10 -13
    import from innobase snapshot

  storage/innobase/row/row0sel.c@stripped, 2007-07-18 18:10:25-07:00, acurtis@stripped +192 -0
    import from innobase snapshot

  storage/innobase/srv/srv0srv.c@stripped, 2007-07-18 18:10:25-07:00, acurtis@stripped +41 -9
    import from innobase snapshot

  storage/innobase/srv/srv0start.c@stripped, 2007-07-18 18:10:25-07:00, acurtis@stripped +6 -0
    import from innobase snapshot

  storage/innobase/sync/sync0arr.c@stripped, 2007-07-18 18:10:26-07:00, acurtis@stripped +3 -1
    import from innobase snapshot

  storage/innobase/sync/sync0rw.c@stripped, 2007-07-18 18:10:26-07:00, acurtis@stripped +18 -0
    import from innobase snapshot

  storage/innobase/sync/sync0sync.c@stripped, 2007-07-18 18:10:26-07:00, acurtis@stripped +1 -0
    import from innobase snapshot

  storage/innobase/trx/trx0rec.c@stripped, 2007-07-18 18:10:26-07:00, acurtis@stripped +11 -8
    import from innobase snapshot

  storage/innobase/trx/trx0sys.c@stripped, 2007-07-18 18:10:26-07:00, acurtis@stripped +10 -1
    import from innobase snapshot

  storage/innobase/trx/trx0trx.c@stripped, 2007-07-18 18:10:26-07:00, acurtis@stripped +68 -6
    import from innobase snapshot

  storage/innobase/trx/trx0undo.c@stripped, 2007-07-18 18:10:26-07:00, acurtis@stripped +62 -39
    import from innobase snapshot

  storage/innobase/ut/ut0mem.c@stripped, 2007-07-18 18:10:26-07:00, acurtis@stripped +2 -0
    import from innobase snapshot

  storage/innobase/ut/ut0ut.c@stripped, 2007-07-18 18:10:26-07:00, acurtis@stripped +1 -0
    import from innobase snapshot

diff -Nrup a/libmysqld/Makefile.am b/libmysqld/Makefile.am
--- a/libmysqld/Makefile.am	2007-05-07 06:39:33 -07:00
+++ b/libmysqld/Makefile.am	2007-07-18 18:10:20 -07:00
@@ -129,7 +129,8 @@ ha_heap.o:ha_heap.cc
 		$(CXXCOMPILE) $(LM_CFLAGS) -c $<
 
 ha_innodb.o:ha_innodb.cc
-		$(CXXCOMPILE) $(LM_CFLAGS) -c $<
+		$(CXXCOMPILE) -I$(top_srcdir)/storage/innobase/include \
+				$(LM_CFLAGS) -c $<
 
 ha_myisam.o:ha_myisam.cc
 		$(CXXCOMPILE) $(LM_CFLAGS) -c $<
diff -Nrup a/mysql-test/include/innodb_trx_weight.inc b/mysql-test/include/innodb_trx_weight.inc
--- /dev/null	Wed Dec 31 16:00:00 196900
+++ b/mysql-test/include/innodb_trx_weight.inc	2007-07-18 18:10:27 -07:00
@@ -0,0 +1,51 @@
+-- connect (con1,localhost,root,,)
+-- connect (con2,localhost,root,,)
+
+-- connection con1
+SET autocommit=0;
+SELECT * FROM t1 FOR UPDATE;
+-- if ($con1_extra_sql_present) {
+	-- eval $con1_extra_sql
+-- }
+
+-- connection con2
+SET autocommit=0;
+SELECT * FROM t2 FOR UPDATE;
+-- if ($con2_extra_sql_present) {
+	-- eval $con2_extra_sql
+-- }
+
+-- if ($con1_should_be_rolledback) {
+	-- connection con1
+	-- send
+	INSERT INTO t2 VALUES (0);
+
+	-- connection con2
+	INSERT INTO t1 VALUES (0);
+	ROLLBACK;
+
+	-- connection con1
+	-- error ER_LOCK_DEADLOCK
+	-- reap
+-- }
+# else
+-- if (!$con1_should_be_rolledback) {
+	-- connection con2
+	-- send
+	INSERT INTO t1 VALUES (0);
+
+	-- connection con1
+	INSERT INTO t2 VALUES (0);
+	ROLLBACK;
+
+	-- connection con2
+	-- error ER_LOCK_DEADLOCK
+	-- reap
+-- }
+
+-- connection default
+
+DELETE FROM t5_nontrans;
+
+-- disconnect con1
+-- disconnect con2
diff -Nrup a/mysql-test/include/mix1.inc b/mysql-test/include/mix1.inc
--- a/mysql-test/include/mix1.inc	2007-07-16 03:09:43 -07:00
+++ b/mysql-test/include/mix1.inc	2007-07-18 18:10:21 -07:00
@@ -683,6 +683,7 @@ select * from bug29807;
 drop table t1;
 --error 1051
 drop table bug29807;
+--remove_file $MYSQLTEST_VARDIR/master-data/test/bug29807.frm
 create table bug29807 (a int);
 drop table bug29807;
 
diff -Nrup a/mysql-test/r/innodb.result b/mysql-test/r/innodb.result
--- a/mysql-test/r/innodb.result	2007-07-02 10:32:58 -07:00
+++ b/mysql-test/r/innodb.result	2007-07-18 18:10:21 -07:00
@@ -501,7 +501,7 @@ ERROR 23000: Duplicate entry 'test2' for
 select * from t1;
 id	ggid	email	passwd
 1	this will work		
-3	test2		this will work
+4	test2		this will work
 select * from t1 where id=1;
 id	ggid	email	passwd
 1	this will work		
@@ -1086,39 +1086,6 @@ n	d
 1	30
 2	20
 drop table t1,t2;
-CREATE TABLE `t1` (
-`a` int(11) NOT NULL auto_increment,
-`b` int(11) default NULL,
-PRIMARY KEY  (`a`)
-) ENGINE=MyISAM DEFAULT CHARSET=latin1 ;
-CREATE TABLE `t2` (
-`a` int(11) NOT NULL auto_increment,
-`b` int(11) default NULL,
-PRIMARY KEY  (`a`)
-) ENGINE=INNODB DEFAULT CHARSET=latin1 ;
-insert into t1 values (1,1),(2,2);
-insert into t2 values (1,1),(4,4);
-reset master;
-UPDATE t2,t1 SET t2.a=t1.a+2;
-ERROR 23000: Duplicate entry '3' for key 'PRIMARY'
-select * from t2 /* must be (3,1), (4,4) */;
-a	b
-1	1
-4	4
-show master status /* there must no UPDATE in binlog */;
-File	Position	Binlog_Do_DB	Binlog_Ignore_DB
-master-bin.000001	106		
-delete from t1;
-delete from t2;
-insert into t1 values (1,2),(3,4),(4,4);
-insert into t2 values (1,2),(3,4),(4,4);
-reset master;
-UPDATE t2,t1  SET t2.a=t2.b where t2.a=t1.a;
-ERROR 23000: Duplicate entry '4' for key 'PRIMARY'
-show master status /* there must be no UPDATE query event */;
-File	Position	Binlog_Do_DB	Binlog_Ignore_DB
-master-bin.000001	106		
-drop table t1, t2;
 create table t1 (a int, b int) engine=innodb;
 insert into t1 values(20,null);
 select t2.b, ifnull(t2.b,"this is null") from t1 as t2 left join t1 as t3 on
@@ -1665,6 +1632,30 @@ t2	CREATE TABLE `t2` (
   CONSTRAINT `t2_ibfk_2` FOREIGN KEY (`b`) REFERENCES `t1` (`id`)
 ) ENGINE=InnoDB DEFAULT CHARSET=latin1
 drop table t2, t1;
+flush status;
+show status like "binlog_cache_use";
+Variable_name	Value
+Binlog_cache_use	0
+show status like "binlog_cache_disk_use";
+Variable_name	Value
+Binlog_cache_disk_use	0
+create table t1 (a int) engine=innodb;
+show status like "binlog_cache_use";
+Variable_name	Value
+Binlog_cache_use	0
+show status like "binlog_cache_disk_use";
+Variable_name	Value
+Binlog_cache_disk_use	0
+begin;
+delete from t1;
+commit;
+show status like "binlog_cache_use";
+Variable_name	Value
+Binlog_cache_use	0
+show status like "binlog_cache_disk_use";
+Variable_name	Value
+Binlog_cache_disk_use	0
+drop table t1;
 create table t1 (c char(10), index (c,c)) engine=innodb;
 ERROR 42S21: Duplicate column name 'c'
 create table t1 (c1 char(10), c2 char(10), index (c1,c2,c1)) engine=innodb;
@@ -1784,13 +1775,13 @@ Variable_name	Value
 Innodb_page_size	16384
 show status like "Innodb_rows_deleted";
 Variable_name	Value
-Innodb_rows_deleted	72
+Innodb_rows_deleted	2070
 show status like "Innodb_rows_inserted";
 Variable_name	Value
-Innodb_rows_inserted	1088
+Innodb_rows_inserted	3083
 show status like "Innodb_rows_updated";
 Variable_name	Value
-Innodb_rows_updated	888
+Innodb_rows_updated	886
 show status like "Innodb_row_lock_waits";
 Variable_name	Value
 Innodb_row_lock_waits	0
@@ -3211,3 +3202,14 @@ t1	CREATE TABLE `t1` (
   CONSTRAINT `t1_t2` FOREIGN KEY (`id`) REFERENCES `t2` (`id`)
 ) ENGINE=InnoDB AUTO_INCREMENT=349 DEFAULT CHARSET=latin1
 DROP TABLE t1,t2;
+CREATE TABLE t1 (
+c01 CHAR(255), c02 CHAR(255), c03 CHAR(255), c04 CHAR(255),
+c05 CHAR(255), c06 CHAR(255), c07 CHAR(255), c08 CHAR(255),
+c09 CHAR(255), c10 CHAR(255), c11 CHAR(255), c12 CHAR(255),
+c13 CHAR(255), c14 CHAR(255), c15 CHAR(255), c16 CHAR(255),
+c17 CHAR(255), c18 CHAR(255), c19 CHAR(255), c20 CHAR(255),
+c21 CHAR(255), c22 CHAR(255), c23 CHAR(255), c24 CHAR(255),
+c25 CHAR(255), c26 CHAR(255), c27 CHAR(255), c28 CHAR(255),
+c29 CHAR(255), c30 CHAR(255), c31 CHAR(255), c32 CHAR(255)
+) ENGINE = InnoDB;
+ERROR 42000: Row size too large. The maximum row size for the used table type, not counting BLOBs, is 8126. You have to change some columns to TEXT or BLOBs
diff -Nrup a/mysql-test/r/innodb_trx_weight.result b/mysql-test/r/innodb_trx_weight.result
--- /dev/null	Wed Dec 31 16:00:00 196900
+++ b/mysql-test/r/innodb_trx_weight.result	2007-07-18 18:10:27 -07:00
@@ -0,0 +1 @@
+SET storage_engine=InnoDB;
diff -Nrup a/mysql-test/t/innodb.test b/mysql-test/t/innodb.test
--- a/mysql-test/t/innodb.test	2007-07-02 04:41:46 -07:00
+++ b/mysql-test/t/innodb.test	2007-07-18 18:10:21 -07:00
@@ -13,11 +13,6 @@
 
 -- source include/not_embedded.inc
 -- source include/have_innodb.inc
--- source include/have_log_bin.inc
-
-# Disabling it temporarily for statement-based logging since some
-# tests are not safe while binlog is on.
--- source include/have_binlog_format_mixed_or_row.inc
 
 #
 # Small basic test with ignore
@@ -760,45 +755,6 @@ select * from t2;
 drop table t1,t2;
 
 #
-# Bug#27716  	multi-update did partially and has not binlogged
-#
-
-CREATE TABLE `t1` (
-  `a` int(11) NOT NULL auto_increment,
-  `b` int(11) default NULL,
-  PRIMARY KEY  (`a`)
-) ENGINE=MyISAM DEFAULT CHARSET=latin1 ;
-
-CREATE TABLE `t2` (
-  `a` int(11) NOT NULL auto_increment,
-  `b` int(11) default NULL,
-  PRIMARY KEY  (`a`)
-) ENGINE=INNODB DEFAULT CHARSET=latin1 ;
-
-# A. testing multi_update::send_eof() execution branch
-insert into t1 values (1,1),(2,2);
-insert into t2 values (1,1),(4,4);
-reset master;
---error ER_DUP_ENTRY
-UPDATE t2,t1 SET t2.a=t1.a+2;
-# check
-select * from t2 /* must be (3,1), (4,4) */;
-show master status /* there must no UPDATE in binlog */;
-
-# B. testing multi_update::send_error() execution branch
-delete from t1;
-delete from t2;
-insert into t1 values (1,2),(3,4),(4,4);
-insert into t2 values (1,2),(3,4),(4,4);
-reset master;
---error ER_DUP_ENTRY
-UPDATE t2,t1  SET t2.a=t2.b where t2.a=t1.a;
-show master status /* there must be no UPDATE query event */;
-
-# cleanup bug#27716
-drop table t1, t2;
-
-#
 # Testing of IFNULL
 #
 create table t1 (a int, b int) engine=innodb;
@@ -1192,6 +1148,41 @@ drop table t2, t1;
 
 
 #
+# Let us test binlog_cache_use and binlog_cache_disk_use status vars.
+# Actually this test has nothing to do with innodb per se, it just requires
+# transactional table. 
+#
+flush status;
+show status like "binlog_cache_use";
+show status like "binlog_cache_disk_use";
+
+create table t1 (a int) engine=innodb;
+
+# Now we are going to create transaction which is long enough so its 
+# transaction binlog will be flushed to disk...
+let $1=2000;
+disable_query_log;
+begin;
+while ($1)
+{
+ eval insert into t1 values( $1 );
+ dec $1;
+}
+commit;
+enable_query_log;
+show status like "binlog_cache_use";
+show status like "binlog_cache_disk_use";
+
+# Transaction which should not be flushed to disk and so should not
+# increase binlog_cache_disk_use.
+begin;
+delete from t1;
+commit;
+show status like "binlog_cache_use";
+show status like "binlog_cache_disk_use";
+drop table t1;
+
+#
 # Bug #6126: Duplicate columns in keys gives misleading error message
 #
 --error 1060
@@ -2348,6 +2339,21 @@ ALTER TABLE t1 ADD CONSTRAINT t1_t2 FORE
 SHOW CREATE TABLE t1;
 
 DROP TABLE t1,t2;
+
+#
+# Bug #21101 (Prints wrong error message if max row size is too large)
+#
+--error 1118
+CREATE TABLE t1 (
+	c01 CHAR(255), c02 CHAR(255), c03 CHAR(255), c04 CHAR(255),
+	c05 CHAR(255), c06 CHAR(255), c07 CHAR(255), c08 CHAR(255),
+	c09 CHAR(255), c10 CHAR(255), c11 CHAR(255), c12 CHAR(255),
+	c13 CHAR(255), c14 CHAR(255), c15 CHAR(255), c16 CHAR(255),
+	c17 CHAR(255), c18 CHAR(255), c19 CHAR(255), c20 CHAR(255),
+	c21 CHAR(255), c22 CHAR(255), c23 CHAR(255), c24 CHAR(255),
+	c25 CHAR(255), c26 CHAR(255), c27 CHAR(255), c28 CHAR(255),
+	c29 CHAR(255), c30 CHAR(255), c31 CHAR(255), c32 CHAR(255)
+	) ENGINE = InnoDB;
 
 #######################################################################
 #                                                                     #
diff -Nrup a/mysql-test/t/innodb_trx_weight.test b/mysql-test/t/innodb_trx_weight.test
--- /dev/null	Wed Dec 31 16:00:00 196900
+++ b/mysql-test/t/innodb_trx_weight.test	2007-07-18 18:10:26 -07:00
@@ -0,0 +1,102 @@
+#
+# Ensure that the number of locks (SELECT FOR UPDATE for example) is
+# added to the number of altered rows when choosing the smallest
+# transaction to kill as a victim when a deadlock is detected.
+# Also transactions what had edited non-transactional tables should
+# be heavier than ones that had not.
+#
+
+-- source include/have_innodb.inc
+
+SET storage_engine=InnoDB;
+
+# we do not really care about what gets output-ed, we are only
+# interested in getting the deadlock resolved according to our
+# expectations
+-- disable_query_log
+-- disable_result_log
+
+-- disable_warnings
+DROP TABLE IF EXISTS t1, t2, t3, t4, t5_nontrans;
+-- enable_warnings
+
+# we will create a simple deadlock with t1, t2 and two connections
+CREATE TABLE t1 (a INT);
+CREATE TABLE t2 (a INT);
+
+# auxiliary table with a bulk of rows which will be locked by a
+# transaction to increase its weight
+CREATE TABLE t3 (a INT);
+
+# auxiliary empty table which will be inserted by a
+# transaction to increase its weight
+CREATE TABLE t4 (a INT);
+
+# auxiliary non-transactional table which will be edited by a
+# transaction to tremendously increase its weight
+CREATE TABLE t5_nontrans (a INT) ENGINE=MyISAM;
+
+INSERT INTO t1 VALUES (1);
+INSERT INTO t2 VALUES (1);
+# insert a lot of rows in t3
+INSERT INTO t3 VALUES (1);
+INSERT INTO t3 SELECT * FROM t3;
+INSERT INTO t3 SELECT * FROM t3;
+INSERT INTO t3 SELECT * FROM t3;
+INSERT INTO t3 SELECT * FROM t3;
+INSERT INTO t3 SELECT * FROM t3;
+INSERT INTO t3 SELECT * FROM t3;
+INSERT INTO t3 SELECT * FROM t3;
+INSERT INTO t3 SELECT * FROM t3;
+INSERT INTO t3 SELECT * FROM t3;
+INSERT INTO t3 SELECT * FROM t3;
+INSERT INTO t3 SELECT * FROM t3;
+
+# test locking weight
+
+-- let $con1_extra_sql =
+-- let $con1_extra_sql_present = 0
+-- let $con2_extra_sql = SELECT * FROM t3 FOR UPDATE
+-- let $con2_extra_sql_present = 1
+-- let $con1_should_be_rolledback = 1
+-- source include/innodb_trx_weight.inc
+
+-- let $con1_extra_sql = INSERT INTO t4 VALUES (1), (1)
+-- let $con1_extra_sql_present = 1
+-- let $con2_extra_sql = SELECT * FROM t3 FOR UPDATE
+-- let $con2_extra_sql_present = 1
+-- let $con1_should_be_rolledback = 1
+-- source include/innodb_trx_weight.inc
+
+-- let $con1_extra_sql = INSERT INTO t4 VALUES (1), (1), (1), (1), (1), (1)
+-- let $con1_extra_sql_present = 1
+-- let $con2_extra_sql = SELECT * FROM t3 FOR UPDATE
+-- let $con2_extra_sql_present = 1
+-- let $con1_should_be_rolledback = 0
+-- source include/innodb_trx_weight.inc
+
+# test weight when non-transactional tables are edited
+
+-- let $con1_extra_sql = INSERT INTO t4 VALUES (1), (1), (1)
+-- let $con1_extra_sql_present = 1
+-- let $con2_extra_sql =
+-- let $con2_extra_sql_present = 0
+-- let $con1_should_be_rolledback = 0
+-- source include/innodb_trx_weight.inc
+
+-- let $con1_extra_sql = INSERT INTO t4 VALUES (1), (1), (1)
+-- let $con1_extra_sql_present = 1
+-- let $con2_extra_sql = INSERT INTO t5_nontrans VALUES (1)
+-- let $con2_extra_sql_present = 1
+-- let $con1_should_be_rolledback = 1
+-- source include/innodb_trx_weight.inc
+
+-- let $con1_extra_sql = INSERT INTO t4 VALUES (1), (1), (1)
+-- let $con1_extra_sql = $con1_extra_sql; INSERT INTO t5_nontrans VALUES (1)
+-- let $con1_extra_sql_present = 1
+-- let $con2_extra_sql = INSERT INTO t5_nontrans VALUES (1)
+-- let $con2_extra_sql_present = 1
+-- let $con1_should_be_rolledback = 0
+-- source include/innodb_trx_weight.inc
+
+DROP TABLE t1, t2, t3, t4, t5_nontrans;
diff -Nrup a/storage/innobase/btr/btr0btr.c b/storage/innobase/btr/btr0btr.c
--- a/storage/innobase/btr/btr0btr.c	2006-11-08 20:00:52 -08:00
+++ b/storage/innobase/btr/btr0btr.c	2007-07-18 18:10:21 -07:00
@@ -2606,8 +2606,11 @@ btr_index_rec_validate(
 
 		rec_get_nth_field(rec, offsets, i, &len);
 
-		/* Note that prefix indexes are not fixed size even when
-		their type is CHAR. */
+		/* Note that if fixed_size != 0, it equals the
+		length of a fixed-size column in the clustered index.
+		A prefix index of the column is of fixed, but different
+		length.  When fixed_size == 0, prefix_len is the maximum
+		length of the prefix index column. */
 
 		if ((dict_index_get_nth_field(index, i)->prefix_len == 0
 		     && len != UNIV_SQL_NULL && fixed_size
diff -Nrup a/storage/innobase/buf/buf0buf.c b/storage/innobase/buf/buf0buf.c
--- a/storage/innobase/buf/buf0buf.c	2007-03-22 14:59:23 -07:00
+++ b/storage/innobase/buf/buf0buf.c	2007-07-18 18:10:21 -07:00
@@ -903,8 +903,7 @@ buf_block_make_young(
 	/* Note that we read freed_page_clock's without holding any mutex:
 	this is allowed since the result is used only in heuristics */
 
-	if (buf_pool->freed_page_clock >= block->freed_page_clock
-				+ 1 + (buf_pool->curr_size / 4)) {
+	if (buf_block_peek_if_too_old(block)) {
 
 		mutex_enter(&buf_pool->mutex);
 		/* There has been freeing activity in the LRU list:
@@ -1647,6 +1646,15 @@ buf_page_init(
 	block->index		= NULL;
 
 	block->lock_hash_val	= lock_rec_hash(space, offset);
+
+#ifdef UNIV_DEBUG_VALGRIND
+	if (!space) {
+		/* Silence valid Valgrind warnings about uninitialized
+		data being written to data files.  There are some unused
+		bytes on some pages that InnoDB does not initialize. */
+		UNIV_MEM_VALID(block->frame, UNIV_PAGE_SIZE);
+	}
+#endif /* UNIV_DEBUG_VALGRIND */
 
 	/* Insert into the hash table of file pages */
 
diff -Nrup a/storage/innobase/buf/buf0lru.c b/storage/innobase/buf/buf0lru.c
--- a/storage/innobase/buf/buf0lru.c	2007-03-22 14:59:24 -07:00
+++ b/storage/innobase/buf/buf0lru.c	2007-07-18 18:10:21 -07:00
@@ -244,7 +244,15 @@ buf_LRU_search_and_free_block(
 			frame at all */
 
 			if (block->frame) {
+				/* The page was declared uninitialized
+				by buf_LRU_block_remove_hashed_page().
+				We need to flag the contents of the
+				page valid (which it still is) in
+				order to avoid bogus Valgrind
+				warnings. */
+				UNIV_MEM_VALID(block->frame, UNIV_PAGE_SIZE);
 				btr_search_drop_page_hash_index(block->frame);
+				UNIV_MEM_INVALID(block->frame, UNIV_PAGE_SIZE);
 			}
 
 			ut_a(block->buf_fix_count == 0);
@@ -449,6 +457,7 @@ loop:
 		mutex_enter(&block->mutex);
 
 		block->state = BUF_BLOCK_READY_FOR_USE;
+		UNIV_MEM_ALLOC(block->frame, UNIV_PAGE_SIZE);
 
 		mutex_exit(&block->mutex);
 
@@ -864,6 +873,7 @@ buf_LRU_block_free_non_file_page(
 
 	block->state = BUF_BLOCK_NOT_USED;
 
+	UNIV_MEM_ALLOC(block->frame, UNIV_PAGE_SIZE);
 #ifdef UNIV_DEBUG
 	/* Wipe contents of page to reveal possible stale pointers to it */
 	memset(block->frame, '\0', UNIV_PAGE_SIZE);
@@ -871,6 +881,8 @@ buf_LRU_block_free_non_file_page(
 	UT_LIST_ADD_FIRST(free, buf_pool->free, block);
 	block->in_free_list = TRUE;
 
+	UNIV_MEM_FREE(block->frame, UNIV_PAGE_SIZE);
+
 	if (srv_use_awe && block->frame) {
 		/* Add to the list of mapped pages */
 
@@ -939,6 +951,7 @@ buf_LRU_block_remove_hashed_page(
 		    buf_page_address_fold(block->space, block->offset),
 		    block);
 
+	UNIV_MEM_INVALID(block->frame, UNIV_PAGE_SIZE);
 	block->state = BUF_BLOCK_REMOVE_HASH;
 }
 
diff -Nrup a/storage/innobase/data/data0data.c b/storage/innobase/data/data0data.c
--- a/storage/innobase/data/data0data.c	2006-09-21 00:38:39 -07:00
+++ b/storage/innobase/data/data0data.c	2007-07-18 18:10:21 -07:00
@@ -18,6 +18,8 @@ Created 5/30/1994 Heikki Tuuri
 #include "dict0dict.h"
 #include "btr0cur.h"
 
+#include <ctype.h>
+
 #ifdef UNIV_DEBUG
 byte	data_error;	/* data pointers of tuple fields are initialized
 			to point here for error checking */
diff -Nrup a/storage/innobase/data/data0type.c b/storage/innobase/data/data0type.c
--- a/storage/innobase/data/data0type.c	2006-09-21 00:38:39 -07:00
+++ b/storage/innobase/data/data0type.c	2007-07-18 18:10:21 -07:00
@@ -190,7 +190,8 @@ dtype_validate(
 	dtype_t*	type)	/* in: type struct to validate */
 {
 	ut_a(type);
-	ut_a((type->mtype >= DATA_VARCHAR) && (type->mtype <= DATA_MYSQL));
+	ut_a(type->mtype >= DATA_VARCHAR);
+	ut_a(type->mtype <= DATA_MYSQL);
 
 	if (type->mtype == DATA_SYS) {
 		ut_a((type->prtype & DATA_MYSQL_TYPE_MASK) < DATA_N_SYS_COLS);
diff -Nrup a/storage/innobase/dict/dict0crea.c b/storage/innobase/dict/dict0crea.c
--- a/storage/innobase/dict/dict0crea.c	2007-03-22 14:59:24 -07:00
+++ b/storage/innobase/dict/dict0crea.c	2007-07-18 18:10:21 -07:00
@@ -1195,7 +1195,8 @@ dict_create_or_check_foreign_constraint_
 		fprintf(stderr, "InnoDB: error %lu in creation\n",
 			(ulong) error);
 
-		ut_a(error == DB_OUT_OF_FILE_SPACE);
+		ut_a(error == DB_OUT_OF_FILE_SPACE
+		     || error == DB_TOO_MANY_CONCURRENT_TRXS);
 
 		fprintf(stderr,
 			"InnoDB: creation failed\n"
diff -Nrup a/storage/innobase/dict/dict0dict.c b/storage/innobase/dict/dict0dict.c
--- a/storage/innobase/dict/dict0dict.c	2007-03-28 22:01:10 -07:00
+++ b/storage/innobase/dict/dict0dict.c	2007-07-18 18:10:21 -07:00
@@ -30,6 +30,8 @@ Created 1/8/1996 Heikki Tuuri
 # include "m_ctype.h" /* my_isspace() */
 #endif /* !UNIV_HOTBACKUP */
 
+#include <ctype.h>
+
 dict_sys_t*	dict_sys	= NULL;	/* the dictionary system */
 
 rw_lock_t	dict_operation_lock;	/* table create, drop, etc. reserve
@@ -416,6 +418,18 @@ dict_table_get_col_name(
 	return(s);
 }
 
+
+/************************************************************************
+Acquire the autoinc lock.*/
+
+void
+dict_table_autoinc_lock(
+/*====================*/
+	dict_table_t*	table)
+{
+	mutex_enter(&table->autoinc_mutex);
+}
+
 /************************************************************************
 Initializes the autoinc counter. It is not an error to initialize an already
 initialized counter. */
@@ -426,54 +440,8 @@ dict_table_autoinc_initialize(
 	dict_table_t*	table,	/* in: table */
 	ib_longlong	value)	/* in: next value to assign to a row */
 {
-	mutex_enter(&(table->autoinc_mutex));
-
 	table->autoinc_inited = TRUE;
 	table->autoinc = value;
-
-	mutex_exit(&(table->autoinc_mutex));
-}
-
-/************************************************************************
-Gets the next autoinc value (== autoinc counter value), 0 if not yet
-initialized. If initialized, increments the counter by 1. */
-
-ib_longlong
-dict_table_autoinc_get(
-/*===================*/
-				/* out: value for a new row, or 0 */
-	dict_table_t*	table)	/* in: table */
-{
-	ib_longlong	value;
-
-	mutex_enter(&(table->autoinc_mutex));
-
-	if (!table->autoinc_inited) {
-
-		value = 0;
-	} else {
-		value = table->autoinc;
-		table->autoinc = table->autoinc + 1;
-	}
-
-	mutex_exit(&(table->autoinc_mutex));
-
-	return(value);
-}
-
-/************************************************************************
-Decrements the autoinc counter value by 1. */
-
-void
-dict_table_autoinc_decrement(
-/*=========================*/
-	dict_table_t*	table)	/* in: table */
-{
-	mutex_enter(&(table->autoinc_mutex));
-
-	table->autoinc = table->autoinc - 1;
-
-	mutex_exit(&(table->autoinc_mutex));
 }
 
 /************************************************************************
@@ -488,8 +456,6 @@ dict_table_autoinc_read(
 {
 	ib_longlong	value;
 
-	mutex_enter(&(table->autoinc_mutex));
-
 	if (!table->autoinc_inited) {
 
 		value = 0;
@@ -497,35 +463,11 @@ dict_table_autoinc_read(
 		value = table->autoinc;
 	}
 
-	mutex_exit(&(table->autoinc_mutex));
-
 	return(value);
 }
 
 /************************************************************************
-Peeks the autoinc counter value, 0 if not yet initialized. Does not
-increment the counter. The read not protected by any mutex! */
-
-ib_longlong
-dict_table_autoinc_peek(
-/*====================*/
-				/* out: value of the counter */
-	dict_table_t*	table)	/* in: table */
-{
-	ib_longlong	value;
-
-	if (!table->autoinc_inited) {
-
-		value = 0;
-	} else {
-		value = table->autoinc;
-	}
-
-	return(value);
-}
-
-/************************************************************************
-Updates the autoinc counter if the value supplied is equal or bigger than the
+Updates the autoinc counter if the value supplied is greater than the
 current value. If not inited, does nothing. */
 
 void
@@ -535,15 +477,21 @@ dict_table_autoinc_update(
 	dict_table_t*	table,	/* in: table */
 	ib_longlong	value)	/* in: value which was assigned to a row */
 {
-	mutex_enter(&(table->autoinc_mutex));
+	if (table->autoinc_inited && value > table->autoinc) {
 
-	if (table->autoinc_inited) {
-		if (value >= table->autoinc) {
-			table->autoinc = value + 1;
-		}
+		table->autoinc = value;
 	}
+}
+
+/************************************************************************
+Release the autoinc lock.*/
 
-	mutex_exit(&(table->autoinc_mutex));
+void
+dict_table_autoinc_unlock(
+/*======================*/
+	dict_table_t*	table)	/* in: release autoinc lock for this table */
+{
+	mutex_exit(&table->autoinc_mutex);
 }
 
 /************************************************************************
@@ -1529,6 +1477,12 @@ dict_index_add_col(
 	if (field->fixed_len > DICT_MAX_INDEX_COL_LEN) {
 		field->fixed_len = 0;
 	}
+#if DICT_MAX_INDEX_COL_LEN != 768
+	/* The comparison limit above must be constant.  If it were
+	changed, the disk format of some fixed-length columns would
+	change, which would be a disaster. */
+# error "DICT_MAX_INDEX_COL_LEN != 768"
+#endif
 
 	if (!(col->prtype & DATA_NOT_NULL)) {
 		index->n_nullable++;
@@ -1585,9 +1539,6 @@ dict_index_copy_types(
 		ifield = dict_index_get_nth_field(index, i);
 		dfield_type = dfield_get_type(dtuple_get_nth_field(tuple, i));
 		dict_col_copy_type(dict_field_get_col(ifield), dfield_type);
-		if (UNIV_UNLIKELY(ifield->prefix_len)) {
-			dfield_type->len = ifield->prefix_len;
-		}
 	}
 }
 
@@ -3361,7 +3312,8 @@ dict_create_foreign_constraints(
 	ulint			err;
 	mem_heap_t*		heap;
 
-	ut_a(trx && trx->mysql_thd);
+	ut_a(trx);
+	ut_a(trx->mysql_thd);
 
 	str = dict_strip_comments(sql_string);
 	heap = mem_heap_create(10000);
@@ -3403,7 +3355,8 @@ dict_foreign_parse_drop_constraints(
 	FILE*			ef	= dict_foreign_err_file;
 	struct charset_info_st*	cs;
 
-	ut_a(trx && trx->mysql_thd);
+	ut_a(trx);
+	ut_a(trx->mysql_thd);
 
 	cs = innobase_get_charset(trx->mysql_thd);
 
@@ -3712,7 +3665,7 @@ dict_index_calc_min_rec_len(
 		}
 
 		/* round the NULL flags up to full bytes */
-		sum += (nullable + 7) / 8;
+		sum += UT_BITS_IN_BYTES(nullable);
 
 		return(sum);
 	}
diff -Nrup a/storage/innobase/dict/dict0mem.c b/storage/innobase/dict/dict0mem.c
--- a/storage/innobase/dict/dict0mem.c	2007-02-23 03:13:50 -08:00
+++ b/storage/innobase/dict/dict0mem.c	2007-07-18 18:10:22 -07:00
@@ -90,6 +90,11 @@ dict_mem_table_create(
 	mutex_create(&table->autoinc_mutex, SYNC_DICT_AUTOINC_MUTEX);
 
 	table->autoinc_inited = FALSE;
+
+	/* The actual increment value will be set by MySQL, we simply
+	default to 1 here.*/
+	table->autoinc_increment = 1;
+
 #ifdef UNIV_DEBUG
 	table->magic_n = DICT_TABLE_MAGIC_N;
 #endif /* UNIV_DEBUG */
diff -Nrup a/storage/innobase/fsp/fsp0fsp.c b/storage/innobase/fsp/fsp0fsp.c
--- a/storage/innobase/fsp/fsp0fsp.c	2007-03-22 14:59:24 -07:00
+++ b/storage/innobase/fsp/fsp0fsp.c	2007-07-18 18:10:22 -07:00
@@ -205,10 +205,9 @@ the extent are free and which contain ol
 					space */
 #define	XDES_FSEG		4	/* extent belongs to a segment */
 
-/* File extent data structure size in bytes. The "+ 7 ) / 8" part in the
-definition rounds the number of bytes upward. */
+/* File extent data structure size in bytes. */
 #define	XDES_SIZE							\
-	(XDES_BITMAP + (FSP_EXTENT_SIZE * XDES_BITS_PER_PAGE + 7) / 8)
+	(XDES_BITMAP + UT_BITS_IN_BYTES(FSP_EXTENT_SIZE * XDES_BITS_PER_PAGE))
 
 /* Offset of the descriptor array on a descriptor page */
 #define	XDES_ARR_OFFSET		(FSP_HEADER_OFFSET + FSP_HEADER_SIZE)
@@ -2830,7 +2829,7 @@ will be able to insert new data to the d
 tablespace. Only free extents are taken into account and we also subtract
 the safety margin required by the above function fsp_reserve_free_extents. */
 
-ulint
+ullint
 fsp_get_available_space_in_free_extents(
 /*====================================*/
 			/* out: available space in kB */
@@ -2896,7 +2895,8 @@ fsp_get_available_space_in_free_extents(
 		return(0);
 	}
 
-	return(((n_free - reserve) * FSP_EXTENT_SIZE)
+	return((ullint)(n_free - reserve)
+	       * FSP_EXTENT_SIZE
 	       * (UNIV_PAGE_SIZE / 1024));
 }
 
@@ -3649,7 +3649,11 @@ fsp_validate(
 	n_full_frag_pages = FSP_EXTENT_SIZE
 		* flst_get_len(header + FSP_FULL_FRAG, &mtr);
 
-	ut_a(free_limit <= size || (space != 0 && size < FSP_EXTENT_SIZE));
+	if (UNIV_UNLIKELY(free_limit > size)) {
+
+		ut_a(space != 0);
+		ut_a(size < FSP_EXTENT_SIZE);
+	}
 
 	flst_validate(header + FSP_FREE, &mtr);
 	flst_validate(header + FSP_FREE_FRAG, &mtr);
diff -Nrup a/storage/innobase/handler/ha_innodb.cc b/storage/innobase/handler/ha_innodb.cc
--- a/storage/innobase/handler/ha_innodb.cc	2007-07-09 01:54:22 -07:00
+++ b/storage/innobase/handler/ha_innodb.cc	2007-07-18 18:10:22 -07:00
@@ -15,11 +15,10 @@
 
 /* This file defines the InnoDB handler: the interface between MySQL and InnoDB
 NOTE: You can only use noninlined InnoDB functions in this file, because we
-have disables the InnoDB inlining in this file. */
+have disabled the InnoDB inlining in this file. */
 
 /* TODO list for the InnoDB handler in 5.0:
-  - Remove the flag trx->active_trans and look at the InnoDB
-    trx struct state field
+  - Remove the flag trx->active_trans and look at trx->conc_state
   - fix savepoint functions to use savepoint storage area
   - Find out what kind of problems the OS X case-insensitivity causes to
     table and database names; should we 'normalize' the names like we do
@@ -31,8 +30,7 @@ have disables the InnoDB inlining in thi
 #endif
 
 #include <mysql_priv.h>
-
-#ifdef WITH_INNOBASE_STORAGE_ENGINE
+#include <mysqld_error.h>
 
 #include <m_ctype.h>
 #include <hash.h>
@@ -40,15 +38,38 @@ have disables the InnoDB inlining in thi
 #include <mysys_err.h>
 #include <my_sys.h>
 #include "ha_innodb.h"
+#include <mysql/plugin.h>
 
-pthread_mutex_t innobase_share_mutex,	/* to protect innobase_open_files */
-		prepare_commit_mutex;	/* to force correct commit order in
-					binlog */
-ulong commit_threads= 0;
-pthread_mutex_t commit_threads_m;
-pthread_cond_t commit_cond;
-pthread_mutex_t commit_cond_m;
-bool innodb_inited= 0;
+#ifdef MYSQL_SERVER
+/* Define some macros until these functions are declared in <mysql/plugin.h>.
+Once these functions are defined by MySQL, we may consider
+removing -DMYSQL_SERVER from ../Makefile.am as well. */
+#define thd_charset(thd) (thd)->charset()
+#define thd_get_xid(thd,xid_) ((*xid_) = (thd)->transaction.xid_state.xid)
+#define thd_memdup(thd,str,len) (thd)->memdup(str, len)
+#define thd_killed(thd) (thd)->killed
+#define thd_slave_thread(thd) (thd)->slave_thread
+#define thd_query(thd) (&(thd)->query)
+#define thd_non_transactional_update(thd) ((thd)->no_trans_update.all)
+#define mysql_bin_log_file_name() mysql_bin_log.get_log_fname()
+#define mysql_bin_log_file_pos() mysql_bin_log.get_log_file()->pos_in_file
+#define mysql_tmpfile() fileno(tmpfile())/* BUGGY: leaks memory, Bug #3998 */
+#define mysql_query_cache_invalidate4(a,b,c,d) query_cache.invalidate(a,b,c,d)
+#else /* MYSQL_SERVER */
+/* This is needed because of Bug #3596.  Let us hope that pthread_mutex_t
+is defined the same in both builds: the MySQL server and the InnoDB plugin. */
+extern pthread_mutex_t LOCK_thread_count;
+#endif /* MYSQL_SERVER */
+
+/** to protect innobase_open_files */
+static pthread_mutex_t innobase_share_mutex;
+/** to force correct commit order in binlog */
+static pthread_mutex_t prepare_commit_mutex;
+static ulong commit_threads = 0;
+static pthread_mutex_t commit_threads_m;
+static pthread_cond_t commit_cond;
+static pthread_mutex_t commit_cond_m;
+static bool innodb_inited = 0;
 
 /*
   This needs to exist until the query cache callback is removed
@@ -66,81 +87,67 @@ typedef uchar mysql_byte;
 
 /* Include necessary InnoDB headers */
 extern "C" {
-#include "../storage/innobase/include/univ.i"
-#include "../storage/innobase/include/os0file.h"
-#include "../storage/innobase/include/os0thread.h"
-#include "../storage/innobase/include/srv0start.h"
-#include "../storage/innobase/include/srv0srv.h"
-#include "../storage/innobase/include/trx0roll.h"
-#include "../storage/innobase/include/trx0trx.h"
-#include "../storage/innobase/include/trx0sys.h"
-#include "../storage/innobase/include/mtr0mtr.h"
-#include "../storage/innobase/include/row0ins.h"
-#include "../storage/innobase/include/row0mysql.h"
-#include "../storage/innobase/include/row0sel.h"
-#include "../storage/innobase/include/row0upd.h"
-#include "../storage/innobase/include/log0log.h"
-#include "../storage/innobase/include/lock0lock.h"
-#include "../storage/innobase/include/dict0crea.h"
-#include "../storage/innobase/include/btr0cur.h"
-#include "../storage/innobase/include/btr0btr.h"
-#include "../storage/innobase/include/fsp0fsp.h"
-#include "../storage/innobase/include/sync0sync.h"
-#include "../storage/innobase/include/fil0fil.h"
-#include "../storage/innobase/include/trx0xa.h"
-#include "../storage/innobase/include/thr0loc.h"
-#include "../storage/innobase/include/ha_prototypes.h"
+#include "univ.i"
+#include "os0file.h"
+#include "os0thread.h"
+#include "srv0start.h"
+#include "srv0srv.h"
+#include "trx0roll.h"
+#include "trx0trx.h"
+#include "trx0sys.h"
+#include "mtr0mtr.h"
+#include "row0ins.h"
+#include "row0mysql.h"
+#include "row0sel.h"
+#include "row0upd.h"
+#include "log0log.h"
+#include "lock0lock.h"
+#include "dict0crea.h"
+#include "btr0cur.h"
+#include "btr0btr.h"
+#include "fsp0fsp.h"
+#include "sync0sync.h"
+#include "fil0fil.h"
+#include "trx0xa.h"
+#include "thr0loc.h"
+#include "ha_prototypes.h"
 }
 
-
-/* The default values for the following, type long or longlong, start-up
-parameters are declared in mysqld.cc: */
-
-long innobase_mirrored_log_groups, innobase_log_files_in_group,
+static long innobase_mirrored_log_groups, innobase_log_files_in_group,
 	innobase_log_buffer_size, innobase_buffer_pool_awe_mem_mb,
 	innobase_additional_mem_pool_size, innobase_file_io_threads,
 	innobase_lock_wait_timeout, innobase_force_recovery,
 	innobase_open_files;
 
-long long innobase_buffer_pool_size, innobase_log_file_size;
+static long long innobase_buffer_pool_size, innobase_log_file_size;
 
 /* The default values for the following char* start-up parameters
 are determined in innobase_init below: */
 
-char*	innobase_data_home_dir			= NULL;
-char*	innobase_data_file_path			= NULL;
-char*	innobase_log_group_home_dir		= NULL;
-char*	innobase_log_arch_dir			= NULL;/* unused */
+static char*	innobase_data_home_dir			= NULL;
+static char*	innobase_data_file_path			= NULL;
+static char*	innobase_log_group_home_dir		= NULL;
 /* The following has a misleading name: starting from 4.0.5, this also
 affects Windows: */
-char*	innobase_unix_file_flush_method		= NULL;
+static char*	innobase_unix_file_flush_method		= NULL;
 
 /* Below we have boolean-valued start-up parameters, and their default
 values */
 
-static
-ulong	innobase_fast_shutdown			= 1;
+static ulong	innobase_fast_shutdown			= 1;
 #ifdef UNIV_LOG_ARCHIVE
-static
-my_bool innobase_log_archive			= FALSE;/* unused */
-#endif /* UNIG_LOG_ARCHIVE */
-static
-my_bool innobase_use_doublewrite		= TRUE;
-static
-my_bool innobase_use_checksums			= TRUE;
-static
-my_bool	innobase_file_per_table			= FALSE;
-static
-my_bool innobase_locks_unsafe_for_binlog	= FALSE;
-static
-my_bool innobase_rollback_on_timeout		= FALSE;
-static
-my_bool innobase_create_status_file		= FALSE;
-static
-my_bool innobase_stats_on_metadata		= TRUE;
+static my_bool	innobase_log_archive			= FALSE;
+static char*	innobase_log_arch_dir			= NULL;
+#endif /* UNIV_LOG_ARCHIVE */
+static my_bool	innobase_use_doublewrite		= TRUE;
+static my_bool	innobase_use_checksums			= TRUE;
+static my_bool	innobase_file_per_table			= FALSE;
+static my_bool	innobase_locks_unsafe_for_binlog	= FALSE;
+static my_bool	innobase_rollback_on_timeout		= FALSE;
+static my_bool	innobase_create_status_file		= FALSE;
+static my_bool innobase_stats_on_metadata		= TRUE;
 
-static
-char*	internal_innobase_data_file_path	= NULL;
+static char*	internal_innobase_data_file_path	= NULL;
 
 /* The following counter is used to convey information to InnoDB
 about server activity: in selects it is not sensible to call
@@ -427,6 +434,22 @@ static SHOW_VAR innodb_status_variables[
 /* General functions */
 
 /**********************************************************************
+Returns true if the thread is the replication thread on the slave
+server. Used in srv_conc_enter_innodb() to determine if the thread
+should be allowed to enter InnoDB - the replication thread is treated
+differently than other threads. Also used in
+srv_conc_force_exit_innodb(). */
+extern "C"
+ibool
+thd_is_replication_slave_thread(
+/*============================*/
+			/* out: true if thd is the replication thread */
+	void*	thd)	/* in: thread handle (THD*) */
+{
+	return((ibool) thd_slave_thread((THD*) thd));
+}
+
+/**********************************************************************
 Save some CPU by testing the value of srv_thread_concurrency in inline
 functions. */
 inline
@@ -482,14 +505,30 @@ innobase_release_stat_resources(
 	}
 }
 
+/**********************************************************************
+Returns true if the transaction this thread is processing has edited
+non-transactional tables. Used by the deadlock detector when deciding
+which transaction to rollback in case of a deadlock - we try to avoid
+rolling back transactions that have edited non-transactional tables. */
+extern "C"
+ibool
+thd_has_edited_nontrans_tables(
+/*===========================*/
+			/* out: true if non-transactional tables have
+			been edited */
+	void*	thd)	/* in: thread handle (THD*) */
+{
+	return((ibool) thd_non_transactional_update((THD*) thd));
+}
+
 /************************************************************************
 Obtain the InnoDB transaction of a MySQL thread. */
 inline
 trx_t*&
 thd_to_trx(
 /*=======*/
-				/* out: reference to transaction pointer */
-	THD*		thd)	/* in: MySQL thread */
+			/* out: reference to transaction pointer */
+	THD*	thd)	/* in: MySQL thread */
 {
 	return(*(trx_t**) thd_ha_data(thd, innodb_hton_ptr));
 }
@@ -502,11 +541,14 @@ static
 int
 innobase_release_temporary_latches(
 /*===============================*/
-         handlerton *hton,
-	 THD *thd)
+				/* out: 0 */
+	handlerton*	hton,	/* in: handlerton */
+	THD*		thd)	/* in: MySQL thread */
 {
 	trx_t*	trx;
 
+	DBUG_ASSERT(hton == innodb_hton_ptr);
+
 	if (!innodb_inited) {
 
 		return 0;
@@ -623,7 +665,7 @@ convert_error_code_to_mysql(
 
 	} else if (error == (int) DB_TABLE_NOT_FOUND) {
 
-		return(HA_ERR_NO_SUCH_TABLE);
+		return(HA_ERR_KEY_NOT_FOUND);
 
 	} else if (error == (int) DB_TOO_BIG_RECORD) {
 
@@ -645,6 +687,20 @@ convert_error_code_to_mysql(
  		}
 
     		return(HA_ERR_LOCK_TABLE_FULL);
+	} else if (error == DB_TOO_MANY_CONCURRENT_TRXS) {
+
+		/* Once MySQL add the appropriate code to errmsg.txt then
+		we can get rid of this #ifdef. NOTE: The code checked by
+		the #ifdef is the suggested name for the error condition
+		and the actual error code name could very well be different.
+		This will require some monitoring, ie. the status
+		of this request on our part.*/
+#ifdef ER_TOO_MANY_CONCURRENT_TRXS
+		return(ER_TOO_MANY_CONCURRENT_TRXS);
+#else
+		return(HA_ERR_RECORD_FILE_FULL);
+#endif
+
     	} else {
     		return(-1);			// Unknown error
     	}
@@ -693,19 +749,19 @@ innobase_mysql_print_thd(
 				   use the default max length */
 {
 	THD*	thd;
-        char	buffer[1024];
+	char	buffer[1024];
 
-        thd = (THD*) input_thd;
-        fputs(thd_security_context(thd, buffer, sizeof(buffer), 
+	thd = (THD*) input_thd;
+	fputs(thd_security_context(thd, buffer, sizeof(buffer), 
 				   max_query_len), f);
-        putc('\n', f);
+	putc('\n', f);
 }
 
 /**********************************************************************
 Get the variable length bounds of the given character set.
 
 NOTE that the exact prototype of this function has to be in
-/innobase/data/data0type.ic! */
+/innobase/include/data0type.ic! */
 extern "C"
 void
 innobase_get_cset_width(
@@ -744,7 +800,7 @@ innobase_convert_from_table_id(
 {
 	uint	errors;
 
-	strconvert(current_thd->charset(), from,
+	strconvert(thd_charset(current_thd), from,
 		   &my_charset_filename, to, (uint) len, &errors);
 }
 
@@ -763,7 +819,7 @@ innobase_convert_from_id(
 {
 	uint	errors;
 
-	strconvert(current_thd->charset(), from,
+	strconvert(thd_charset(current_thd), from,
 		   system_charset_info, to, (uint) len, &errors);
 }
 
@@ -826,7 +882,7 @@ innobase_get_charset(
 				/* out: connection character set */
 	void*	mysql_thd)	/* in: MySQL thread handle */
 {
-	return(((THD*) mysql_thd)->charset());
+	return(thd_charset((THD*) mysql_thd));
 }
 
 /*************************************************************************
@@ -837,41 +893,7 @@ innobase_mysql_tmpfile(void)
 /*========================*/
 			/* out: temporary file descriptor, or < 0 on error */
 {
-	char	filename[FN_REFLEN];
-	int	fd2 = -1;
-	File	fd = create_temp_file(filename, mysql_tmpdir, "ib",
-#ifdef __WIN__
-				O_BINARY | O_TRUNC | O_SEQUENTIAL |
-				O_TEMPORARY | O_SHORT_LIVED |
-#endif /* __WIN__ */
-				O_CREAT | O_EXCL | O_RDWR,
-				MYF(MY_WME));
-	if (fd >= 0) {
-#ifndef __WIN__
-		/* On Windows, open files cannot be removed, but files can be
-		created with the O_TEMPORARY flag to the same effect
-		("delete on close"). */
-		unlink(filename);
-#endif /* !__WIN__ */
-		/* Copy the file descriptor, so that the additional resources
-		allocated by create_temp_file() can be freed by invoking
-		my_close().
-
-		Because the file descriptor returned by this function
-		will be passed to fdopen(), it will be closed by invoking
-		fclose(), which in turn will invoke close() instead of
-		my_close(). */
-		fd2 = dup(fd);
-		if (fd2 < 0) {
-			DBUG_PRINT("error",("Got error %d on dup",fd2));
-			my_errno=errno;
-			my_error(EE_OUT_OF_FILERESOURCES,
-				 MYF(ME_BELL+ME_WAITTANG),
-				 filename, my_errno);
-		}
-		my_close(fd, MYF(MY_WME));
-	}
-	return(fd2);
+	return(mysql_tmpfile());
 }
 
 /*************************************************************************
@@ -903,7 +925,6 @@ trx_t*
 check_trx_exists(
 /*=============*/
 			/* out: InnoDB transaction handle */
-	handlerton*	hton,	/* in: handlerton for innodb */
 	THD*	thd)	/* in: user thread handle */
 {
 	trx_t*&	trx = thd_to_trx(thd);
@@ -915,19 +936,16 @@ check_trx_exists(
 		trx = trx_allocate_for_mysql();
 
 		trx->mysql_thd = thd;
-		trx->mysql_query_str = &(thd->query);
-		trx->active_trans = 0;
+		trx->mysql_query_str = thd_query(thd);
 
 		/* Update the info whether we should skip XA steps that eat
 		CPU time */
 		trx->support_xa = THDVAR(thd, support_xa);
-
-		thd_to_trx(thd) = trx;
 	} else {
 		if (trx->magic_n != TRX_MAGIC_N) {
 			mem_analyze_corruption(trx);
 
-			ut_a(0);
+			ut_error;
 		}
 	}
 
@@ -958,7 +976,6 @@ ha_innobase::ha_innobase(handlerton *hto
 		  HA_CAN_SQL_HANDLER |
 		  HA_PRIMARY_KEY_REQUIRED_FOR_POSITION |
 		  HA_PRIMARY_KEY_IN_READ_INDEX |
-                  HA_BINLOG_ROW_CAPABLE |
 		  HA_CAN_GEOMETRY | HA_PARTIAL_COLUMN_READ |
 		  HA_TABLE_SCAN_ON_INDEX),
   start_of_scan(0),
@@ -978,7 +995,7 @@ ha_innobase::update_thd(
 {
 	trx_t*		trx;
 
-	trx = check_trx_exists(ht, thd);
+	trx = check_trx_exists(thd);
 
 	if (prebuilt->trx != trx) {
 
@@ -1117,7 +1134,7 @@ innobase_query_caching_of_table_permitte
 
 	ut_a(full_name_len < 999);
 
-	trx = check_trx_exists(innodb_hton_ptr, thd);
+	trx = check_trx_exists(thd);
 
 	if (trx->isolation_level == TRX_ISO_SERIALIZABLE) {
 		/* In the SERIALIZABLE mode we add LOCK IN SHARE MODE to every
@@ -1222,10 +1239,10 @@ innobase_invalidate_query_cache(
 
 	/* Argument TRUE below means we are using transactions */
 #ifdef HAVE_QUERY_CACHE
-	query_cache.invalidate((THD*)(trx->mysql_thd),
-					(const char*)full_name,
-					(uint32)full_name_len,
-					TRUE);
+	mysql_query_cache_invalidate4((THD*) trx->mysql_thd,
+				      (const char*) full_name,
+				      (uint32) full_name_len,
+				      TRUE);
 #endif
 }
 
@@ -1305,7 +1322,20 @@ trx_is_interrupted(
 			/* out: TRUE if interrupted */
 	trx_t*	trx)	/* in: transaction */
 {
-	return(trx && trx->mysql_thd && ((THD*) trx->mysql_thd)->killed);
+	return(trx && trx->mysql_thd && thd_killed((THD*) trx->mysql_thd));
+}
+
+/******************************************************************
+Resets some fields of a prebuilt struct. The template is used in fast
+retrieval of just those column values MySQL needs in its processing. */
+static
+void
+reset_template(
+/*===========*/
+	row_prebuilt_t*	prebuilt)	/* in/out: prebuilt struct */
+{
+	prebuilt->keep_other_fields_on_keyread = 0;
+	prebuilt->read_just_key = 0;
 }
 
 /*********************************************************************
@@ -1343,7 +1373,7 @@ ha_innobase::init_table_handle_for_HANDL
 
 	if (prebuilt->trx->active_trans == 0) {
 
-		innobase_register_trx_and_stmt(ht, ha_thd());
+		innobase_register_trx_and_stmt(ht, user_thd);
 
 		prebuilt->trx->active_trans = 1;
 	}
@@ -1366,19 +1396,18 @@ ha_innobase::init_table_handle_for_HANDL
 	/* We want always to fetch all columns in the whole row? Or do
 	we???? */
 
-	prebuilt->read_just_key = FALSE;
-
 	prebuilt->used_in_HANDLER = TRUE;
-
-	prebuilt->keep_other_fields_on_keyread = FALSE;
+	reset_template(prebuilt);
 }
 
 /*************************************************************************
 Opens an InnoDB database. */
 static
 int
-innobase_init(void *p)
-/*===============*/
+innobase_init(
+/*==========*/
+			/* out: 0 on success, error code on failure */
+	void	*p)	/* in: InnoDB handlerton */
 {
 	static char	current_dir[3];		/* Set if using current lib */
 	int		err;
@@ -1387,9 +1416,9 @@ innobase_init(void *p)
 
 	DBUG_ENTER("innobase_init");
         handlerton *innobase_hton= (handlerton *)p;
-        innodb_hton_ptr= innobase_hton;
+        innodb_hton_ptr = innobase_hton;
 
-        innobase_hton->state= SHOW_OPTION_YES;
+        innobase_hton->state = SHOW_OPTION_YES;
         innobase_hton->db_type= DB_TYPE_INNODB;
         innobase_hton->savepoint_offset=sizeof(trx_named_savept_t);
         innobase_hton->close_connection=innobase_close_connection;
@@ -1569,10 +1598,7 @@ innobase_init(void *p)
 	changes the value so that it becomes the number of database pages. */
 
 	if (innobase_buffer_pool_awe_mem_mb == 0) {
-		/* Careful here: we first convert the signed long int to ulint
-		and only after that divide */
-
-		srv_pool_size = ((ulint) innobase_buffer_pool_size) / 1024;
+		srv_pool_size = (ulint)(innobase_buffer_pool_size / 1024);
 	} else {
 		srv_use_awe = TRUE;
 		srv_pool_size = (ulint)
@@ -1657,6 +1683,7 @@ innobase_init(void *p)
 
 	DBUG_RETURN(FALSE);
 error:
+	innobase_hton->state = SHOW_OPTION_DISABLED;
 	DBUG_RETURN(TRUE);
 }
 
@@ -1751,7 +1778,7 @@ innobase_start_trx_and_assign_read_view(
 
 	/* Create a new trx struct for thd, if it does not yet have one */
 
-	trx = check_trx_exists(hton, thd);
+	trx = check_trx_exists(thd);
 
 	/* This is just to play safe: release a possible FIFO ticket and
 	search latch. Since we will reserve the kernel mutex, we have to
@@ -1796,7 +1823,7 @@ innobase_commit(
 	DBUG_ENTER("innobase_commit");
 	DBUG_PRINT("trans", ("ending transaction"));
 
-	trx = check_trx_exists(hton, thd);
+	trx = check_trx_exists(thd);
 
 	/* Update the info whether we should skip XA steps that eat CPU time */
 	trx->support_xa = THDVAR(thd, support_xa);
@@ -1855,9 +1882,8 @@ retry:
 			}
 		}
 
-		trx->mysql_log_file_name = mysql_bin_log.get_log_fname();
-		trx->mysql_log_offset =
-			(ib_longlong)mysql_bin_log.get_log_file()->pos_in_file;
+		trx->mysql_log_file_name = mysql_bin_log_file_name();
+		trx->mysql_log_offset = (ib_longlong) mysql_bin_log_file_pos();
 
 		innobase_commit_low(trx);
 
@@ -1892,6 +1918,8 @@ retry:
 		trx_mark_sql_stat_end(trx);
 	}
 
+	trx->n_autoinc_rows = 0; /* Reset the number AUTO-INC rows required */
+
 	if (trx->declared_to_be_inside_innodb) {
 		/* Release our possible ticket in the FIFO */
 
@@ -1987,7 +2015,6 @@ int
 innobase_commit_complete(
 /*=====================*/
 				/* out: 0 */
-        handlerton *hton, /* in: Innodb handlerton */ 
 	THD*	thd)		/* in: user thread */
 {
 	trx_t*	trx;
@@ -2012,8 +2039,8 @@ innobase_commit_complete(
 
 /*********************************************************************
 Rolls back a transaction or the latest SQL statement. */
-
-static int
+static
+int
 innobase_rollback(
 /*==============*/
 			/* out: 0 or error number */
@@ -2029,7 +2056,7 @@ innobase_rollback(
 	DBUG_ENTER("innobase_rollback");
 	DBUG_PRINT("trans", ("aborting transaction"));
 
-	trx = check_trx_exists(hton, thd);
+	trx = check_trx_exists(thd);
 
 	/* Update the info whether we should skip XA steps that eat CPU time */
 	trx->support_xa = THDVAR(thd, support_xa);
@@ -2095,8 +2122,8 @@ innobase_rollback_trx(
 
 /*********************************************************************
 Rolls back a transaction to a savepoint. */
-
-static int
+static
+int
 innobase_rollback_to_savepoint(
 /*===========================*/
 				/* out: 0 if success, HA_ERR_NO_SAVEPOINT if
@@ -2113,7 +2140,7 @@ innobase_rollback_to_savepoint(
 
 	DBUG_ENTER("innobase_rollback_to_savepoint");
 
-	trx = check_trx_exists(hton, thd);
+	trx = check_trx_exists(thd);
 
 	/* Release a possible FIFO ticket and search latch. Since we will
 	reserve the kernel mutex, we have to release the search system latch
@@ -2149,7 +2176,7 @@ innobase_release_savepoint(
 
 	DBUG_ENTER("innobase_release_savepoint");
 
-	trx = check_trx_exists(hton, thd);
+	trx = check_trx_exists(thd);
 
 	/* TODO: use provided savepoint data area to store savepoint data */
 
@@ -2181,10 +2208,12 @@ innobase_savepoint(
 	  (unless we are in sub-statement), so SQL layer ensures that
 	  this method is never called in such situation.
 	*/
+#ifdef MYSQL_SERVER /* plugins cannot access thd->in_sub_stmt */
 	DBUG_ASSERT(thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN) ||
 		thd->in_sub_stmt);
+#endif /* MYSQL_SERVER */
 
-	trx = check_trx_exists(hton, thd);
+	trx = check_trx_exists(thd);
 
 	/* Release a possible FIFO ticket and search latch. Since we will
 	reserve the kernel mutex, we have to release the search system latch
@@ -2217,6 +2246,8 @@ innobase_close_connection(
 {
 	trx_t*	trx;
 
+	DBUG_ENTER("innobase_close_connection");
+	DBUG_ASSERT(hton == innodb_hton_ptr);
 	trx = thd_to_trx(thd);
 
 	ut_a(trx);
@@ -2243,7 +2274,7 @@ innobase_close_connection(
 	thr_local_free(trx->mysql_thread_id);
 	trx_free_for_mysql(trx);
 
-	return(0);
+	DBUG_RETURN(0);
 }
 
 
@@ -2269,21 +2300,6 @@ ha_innobase::get_row_type() const
 	return(ROW_TYPE_NOT_USED);
 }
 
-
-
-/********************************************************************
-Get the table flags to use for the statement. */
-handler::Table_flags
-ha_innobase::table_flags() const
-{
-       /* Need to use tx_isolation here since table flags is (also)
-          called before prebuilt is inited. */
-        ulong const tx_isolation = thd_tx_isolation(current_thd);
-        if (tx_isolation <= ISO_READ_COMMITTED)
-                return int_table_flags;
-        return int_table_flags | HA_BINLOG_STMT_CAPABLE;
-}
-
 /********************************************************************
 Gives the file extension of an InnoDB single-table tablespace. */
 static const char* ha_innobase_exts[] = {
@@ -2523,8 +2539,15 @@ ha_innobase::close(void)
 /*====================*/
 				/* out: 0 */
 {
+	THD*	thd;
+
 	DBUG_ENTER("ha_innobase::close");
 
+	thd = current_thd;  // avoid calling current_thd twice, it may be slow
+	if (thd != NULL) {
+		innobase_release_temporary_latches(ht, thd);
+	}
+
 	row_prebuilt_free(prebuilt);
 
 	my_free(upd_buff, MYF(0));
@@ -3099,7 +3122,7 @@ static
 void
 build_template(
 /*===========*/
-	row_prebuilt_t*	prebuilt,	/* in: prebuilt struct */
+	row_prebuilt_t*	prebuilt,	/* in/out: prebuilt struct */
 	THD*		thd,		/* in: current user thread, used
 					only if templ_type is
 					ROW_MYSQL_REC_FIELDS */
@@ -3306,6 +3329,93 @@ skip_field:
 }
 
 /************************************************************************
+This special handling is really to overcome the limitations of MySQL's
+binlogging. We need to eliminate the non-determinism that will arise in
+INSERT ... SELECT type of statements, since MySQL binlog only stores the
+min value of the autoinc interval. Once that is fixed we can get rid of
+the special lock handling.*/
+
+ulong
+ha_innobase::innobase_autoinc_lock(void)
+/*====================================*/
+					/* out: DB_SUCCESS if all OK else
+					error code */
+{
+	ulint		error = DB_SUCCESS;
+
+	if (thd_sql_command(user_thd) == SQLCOM_INSERT) {
+		dict_table_autoinc_lock(prebuilt->table);
+
+		/* We peek at the dict_table_t::auto_inc_lock to check if
+		another statement has locked it */
+		if (prebuilt->trx->auto_inc_lock != NULL) {
+			/* Release the mutex to avoid deadlocks */
+			dict_table_autoinc_unlock(prebuilt->table);
+
+			goto acquire_auto_inc_lock;
+		}
+	} else {
+acquire_auto_inc_lock:
+		error = row_lock_table_autoinc_for_mysql(prebuilt);
+
+		if (error == DB_SUCCESS) {
+			dict_table_autoinc_lock(prebuilt->table);
+		}
+	}
+
+	return(ulong(error));
+}
+
+/************************************************************************
+Reset the autoinc value in the table.*/
+
+ulong
+ha_innobase::innobase_reset_autoinc(
+/*================================*/
+					/* out: DB_SUCCESS if all went well
+					else error code */
+	ulonglong	autoinc)	/* in: value to store */
+{
+	ulint		error;
+
+	error = innobase_autoinc_lock();
+
+	if (error == DB_SUCCESS) {
+
+		dict_table_autoinc_initialize(prebuilt->table, autoinc);
+
+		dict_table_autoinc_unlock(prebuilt->table);
+	}
+
+	return(ulong(error));
+}
+
+/************************************************************************
+Store the autoinc value in the table. The autoinc value is only set if
+it's greater than the existing autoinc value in the table.*/
+
+ulong
+ha_innobase::innobase_set_max_autoinc(
+/*==================================*/
+					/* out: DB_SUCCES if all went well
+					else error code */
+	ulonglong	auto_inc)	/* in: value to store */
+{
+	ulint		error;
+
+	error = innobase_autoinc_lock();
+
+	if (error == DB_SUCCESS) {
+
+		dict_table_autoinc_update(prebuilt->table, auto_inc);
+
+		dict_table_autoinc_unlock(prebuilt->table);
+	}
+
+	return(ulong(error));
+}
+
+/************************************************************************
 Stores a row in an InnoDB database, to the table specified in this
 handle. */
 
@@ -3315,12 +3425,10 @@ ha_innobase::write_row(
 				/* out: error code */
 	mysql_byte*	record)	/* in: a row in MySQL format */
 {
-	int		error;
-	longlong	auto_inc;
-	longlong	dummy;
+	int		error = 0;
 	ibool		auto_inc_used= FALSE;
-        THD *thd=       ha_thd();
-	trx_t*		trx = thd_to_trx(thd);
+	ulint		sql_command;
+	trx_t*		trx = thd_to_trx(user_thd);
 
 	DBUG_ENTER("ha_innobase::write_row");
 
@@ -3344,11 +3452,13 @@ ha_innobase::write_row(
 	if (table->timestamp_field_type & TIMESTAMP_AUTO_SET_ON_INSERT)
 		table->timestamp_field->set_time();
 
-	if ((thd_sql_command(thd) == SQLCOM_ALTER_TABLE
-			|| thd_sql_command(thd) == SQLCOM_OPTIMIZE
-			|| thd_sql_command(thd) == SQLCOM_CREATE_INDEX
-			|| thd_sql_command(thd) == SQLCOM_DROP_INDEX)
-		&& num_write_row >= 10000) {
+	sql_command = thd_sql_command(user_thd);
+
+	if ((sql_command == SQLCOM_ALTER_TABLE
+	     || sql_command == SQLCOM_OPTIMIZE
+	     || sql_command == SQLCOM_CREATE_INDEX
+	     || sql_command == SQLCOM_DROP_INDEX)
+	    && num_write_row >= 10000) {
 		/* ALTER TABLE is COMMITted at every 10000 copied rows.
 		The IX table lock for the original table has to be re-issued.
 		As this method will be called on a temporary table where the
@@ -3414,62 +3524,20 @@ no_commit:
 
 	num_write_row++;
 
+	/* This is the case where the table has an auto-increment column */
 	if (table->next_number_field && record == table->record[0]) {
-		/* This is the case where the table has an
-		auto-increment column */
-
-		/* Initialize the auto-inc counter if it has not been
-		initialized yet */
-
-		if (0 == dict_table_autoinc_peek(prebuilt->table)) {
-
-			/* This call initializes the counter */
-			error = innobase_read_and_init_auto_inc(&dummy);
-
-			if (error) {
-				/* Deadlock or lock wait timeout */
 
-				goto func_exit;
-			}
-
-			/* We have to set sql_stat_start to TRUE because
-			the above call probably has called a select, and
-			has reset that flag; row_insert_for_mysql has to
-			know to set the IX intention lock on the table,
-			something it only does at the start of each
-			statement */
-
-			prebuilt->sql_stat_start = TRUE;
-		}
-
-		/* We have to use the transactional lock mechanism on the
-		auto-inc counter of the table to ensure that replication and
-		roll-forward of the binlog exactly imitates also the given
-		auto-inc values. The lock is released at each SQL statement's
-		end. This lock also prevents a race where two threads would
-		call ::get_auto_increment() simultaneously. */
-
-		error = row_lock_table_autoinc_for_mysql(prebuilt);
-
-		if (error != DB_SUCCESS) {
-			/* Deadlock or lock wait timeout */
-
-			error = convert_error_code_to_mysql(error, user_thd);
+		if ((error = update_auto_increment())) {
 
 			goto func_exit;
 		}
 
-		/* We must use the handler code to update the auto-increment
-		value to be sure that we increment it correctly. */
-
-    		if ((error= update_auto_increment()))
-			goto func_exit;
-		auto_inc_used = 1;
-
+		auto_inc_used = TRUE;
 	}
 
 	if (prebuilt->mysql_template == NULL
-			|| prebuilt->template_type != ROW_MYSQL_WHOLE_ROW) {
+	    || prebuilt->template_type != ROW_MYSQL_WHOLE_ROW) {
+
 		/* Build the template used in converting quickly between
 		the two database formats */
 
@@ -3480,47 +3548,68 @@ no_commit:
 
 	error = row_insert_for_mysql((byte*) record, prebuilt);
 
-	if (error == DB_SUCCESS && auto_inc_used) {
-
-		/* Fetch the value that was set in the autoincrement field */
+	/* Handle duplicate key errors */
+	if (auto_inc_used) {
+		ulonglong	auto_inc;
+
+		/* Note the number of rows processed for this statement, used
+		by get_auto_increment() to determine the number of AUTO-INC
+		values to reserve. This is only useful for a mult-value INSERT
+		and is a statement level counter.*/
+		--trx->n_autoinc_rows;
 
+		/* Get the value that MySQL attemted to store in the table.*/
 		auto_inc = table->next_number_field->val_int();
 
-		if (auto_inc != 0) {
-			/* This call will update the counter according to the
-			value that was inserted in the table */
+		switch (error) {
+		case DB_DUPLICATE_KEY:
+
+			/* A REPLACE command and LOAD DATA INFILE REPLACE
+			handle a duplicate key error themselves, but we
+			must update the autoinc counter if we are performing
+			those statements. */
+
+			switch (sql_command) {
+			case SQLCOM_LOAD:
+				if ((trx->duplicates
+				    & (TRX_DUP_IGNORE | TRX_DUP_REPLACE))) {
+
+					goto set_max_autoinc;
+				}
+				break;
+
+			case SQLCOM_REPLACE:
+			case SQLCOM_INSERT_SELECT:
+			case SQLCOM_REPLACE_SELECT:
+				goto set_max_autoinc;
+				break;
 
-            		dict_table_autoinc_update(prebuilt->table, auto_inc);
-          	}
-        }
-
-        /* A REPLACE command and LOAD DATA INFILE REPLACE handle a duplicate
-        key error themselves, and we must update the autoinc counter if we are
-        performing those statements. */
-
-        if (error == DB_DUPLICATE_KEY && auto_inc_used
-            && (thd_sql_command(user_thd) == SQLCOM_REPLACE
-                || thd_sql_command(user_thd) == SQLCOM_REPLACE_SELECT
-                || (thd_sql_command(user_thd) == SQLCOM_INSERT
-                    && prebuilt->trx->allow_duplicates
-		    && !prebuilt->trx->replace_duplicates)
-                || (thd_sql_command(user_thd) == SQLCOM_LOAD
-                    && prebuilt->trx->allow_duplicates
-		    && prebuilt->trx->replace_duplicates))) {
-
-                auto_inc = table->next_number_field->val_int();
-
-                if (auto_inc != 0) {
-                        dict_table_autoinc_update(prebuilt->table, auto_inc);
-                }
-        }
+			default:
+				break;
+			}
+
+			break;
+
+		case DB_SUCCESS:
+			/* If the actual value inserted is greater than
+			the upper limit of the interval, then we try and
+			update the table upper limit. Note: last_value
+			will be 0 if get_auto_increment() was not called.*/
+
+			if (auto_inc > prebuilt->last_value) {
+set_max_autoinc:
+				auto_inc += prebuilt->table->autoinc_increment;
+
+				innobase_set_max_autoinc(auto_inc);
+			}
+			break;
+		}
+	}
 
 	innodb_srv_conc_exit_innodb(prebuilt->trx);
 
 	error = convert_error_code_to_mysql(error, user_thd);
 
-	/* Tell InnoDB server that there might be work for
-	utility threads: */
 func_exit:
 	innobase_active_small();
 
@@ -3694,7 +3783,7 @@ ha_innobase::update_row(
 {
 	upd_t*		uvect;
 	int		error = 0;
-	trx_t*		trx = thd_to_trx(ha_thd());
+	trx_t*		trx = thd_to_trx(user_thd);
 
 	DBUG_ENTER("ha_innobase::update_row");
 
@@ -3725,6 +3814,32 @@ ha_innobase::update_row(
 
 	error = row_update_for_mysql((byte*) old_row, prebuilt);
 
+	/* We need to do some special AUTOINC handling for the following case:
+
+	INSERT INTO t (c1,c2) VALUES(x,y) ON DUPLICATE KEY UPDATE ...
+
+	We need to use the AUTOINC counter that was actually used by
+	MySQL in the UPDATE statement, which can be different from the
+	value used in the INSERT statement.*/
+
+	if (error == DB_SUCCESS
+	    && table->next_number_field
+	    && new_row == table->record[0]
+	    && thd_sql_command(user_thd) == SQLCOM_INSERT
+	    && (trx->duplicates & (TRX_DUP_IGNORE | TRX_DUP_REPLACE))
+		== TRX_DUP_IGNORE)  {
+
+		longlong	auto_inc;
+
+		auto_inc = table->next_number_field->val_int();
+
+		if (auto_inc != 0) {
+			auto_inc += prebuilt->table->autoinc_increment;
+
+			innobase_set_max_autoinc(auto_inc);
+		}
+	}
+
 	innodb_srv_conc_exit_innodb(trx);
 
 	error = convert_error_code_to_mysql(error, user_thd);
@@ -3747,12 +3862,25 @@ ha_innobase::delete_row(
 	const mysql_byte* record)	/* in: a row in MySQL format */
 {
 	int		error = 0;
-	trx_t*		trx = thd_to_trx(ha_thd());
+	trx_t*		trx = thd_to_trx(user_thd);
 
 	DBUG_ENTER("ha_innobase::delete_row");
 
 	ut_a(prebuilt->trx == trx);
 
+	/* Only if the table has an AUTOINC column */
+	if (table->found_next_number_field && record == table->record[0]) {
+		ulonglong	dummy = 0;
+
+		error = innobase_get_auto_increment(&dummy);
+
+		if (error == DB_SUCCESS) {
+			dict_table_autoinc_unlock(prebuilt->table);
+		} else {
+			goto error_exit;
+		}
+	}
+
 	if (!prebuilt->upd_node) {
 		row_get_prebuilt_update_vector(prebuilt);
 	}
@@ -3767,6 +3895,7 @@ ha_innobase::delete_row(
 
 	innodb_srv_conc_exit_innodb(trx);
 
+error_exit:
 	error = convert_error_code_to_mysql(error, user_thd);
 
 	/* Tell the InnoDB server that there might be work for
@@ -3833,7 +3962,8 @@ ha_innobase::try_semi_consistent_read(bo
 	option is used or this session is using READ COMMITTED isolation
 	level. */
 
-	if (yes &&  (srv_locks_unsafe_for_binlog
+	if (yes
+	    && (srv_locks_unsafe_for_binlog
 		|| prebuilt->trx->isolation_level == TRX_ISO_READ_COMMITTED)) {
 		prebuilt->row_read_type = ROW_READ_TRY_SEMI_CONSISTENT;
 	} else {
@@ -3991,7 +4121,7 @@ ha_innobase::index_read(
 
 	DBUG_ENTER("index_read");
 
-	ut_a(prebuilt->trx == thd_to_trx(ha_thd()));
+	ut_a(prebuilt->trx == thd_to_trx(user_thd));
 
 	ha_statistic_increment(&SSV::ha_read_key_count);
 
@@ -4080,42 +4210,66 @@ ha_innobase::index_read_last(
 }
 
 /************************************************************************
-Changes the active index of a handle. */
+Get the index for a handle. Does not change active index.*/
 
-int
-ha_innobase::change_active_index(
-/*=============================*/
-			/* out: 0 or error code */
-	uint	keynr)	/* in: use this index; MAX_KEY means always clustered
-			index, even if it was internally generated by
-			InnoDB */
+dict_index_t*
+ha_innobase::innobase_get_index(
+/*============================*/
+				/* out: NULL or index instance. */
+	uint		keynr)	/* in: use this index; MAX_KEY means always
+				clustered index, even if it was internally
+				generated by InnoDB */
 {
-	KEY*		key=0;
-	DBUG_ENTER("change_active_index");
+	KEY*		key = 0;
+	dict_index_t*	index = 0;
+
+	DBUG_ENTER("innobase_get_index");
 	ha_statistic_increment(&SSV::ha_read_key_count);
 
 	ut_ad(user_thd == ha_thd());
 	ut_a(prebuilt->trx == thd_to_trx(user_thd));
 
-	active_index = keynr;
-
 	if (keynr != MAX_KEY && table->s->keys > 0) {
-		key = table->key_info + active_index;
+		key = table->key_info + keynr;
 
-		prebuilt->index = dict_table_get_index_noninline(
+		index = dict_table_get_index_noninline(
 			prebuilt->table, key->name);
 	} else {
-		prebuilt->index = dict_table_get_first_index_noninline(
-							   prebuilt->table);
+		index = dict_table_get_first_index_noninline(prebuilt->table);
 	}
 
-	if (!prebuilt->index) {
+	if (!index) {
 		sql_print_error(
 			"Innodb could not find key n:o %u with name %s "
 			"from dict cache for table %s",
 			keynr, key ? key->name : "NULL",
 			prebuilt->table->name);
+	}
+
+	DBUG_RETURN(index);
+}
 
+/************************************************************************
+Changes the active index of a handle. */
+
+int
+ha_innobase::change_active_index(
+/*=============================*/
+			/* out: 0 or error code */
+	uint	keynr)	/* in: use this index; MAX_KEY means always clustered
+			index, even if it was internally generated by
+			InnoDB */
+{
+	DBUG_ENTER("change_active_index");
+
+	ut_ad(user_thd == ha_thd());
+	ut_a(prebuilt->trx == thd_to_trx(user_thd));
+
+	active_index = keynr;
+
+	prebuilt->index = innobase_get_index(keynr);
+
+	if (!prebuilt->index) {
 		DBUG_RETURN(1);
 	}
 
@@ -4183,7 +4337,7 @@ ha_innobase::general_fetch(
 
 	DBUG_ENTER("general_fetch");
 
-	ut_a(prebuilt->trx == thd_to_trx(ha_thd()));
+	ut_a(prebuilt->trx == thd_to_trx(user_thd));
 
 	innodb_srv_conc_enter_innodb(prebuilt->trx);
 
@@ -4482,6 +4636,24 @@ ha_innobase::position(
 }
 
 /*********************************************************************
+If it's a DB_TOO_BIG_RECORD error then set a suitable message to
+return to the client.*/
+inline
+void
+innodb_check_for_record_too_big_error(
+/*==================================*/
+	ulint	comp,	/* in: ROW_FORMAT: nonzero=COMPACT, 0=REDUNDANT */
+	int	error)	/* in: error code to check */
+{
+	if (error == (int)DB_TOO_BIG_RECORD) {
+		ulint	max_row_size
+			= page_get_free_space_of_empty_noninline(comp) / 2;
+
+		my_error(ER_TOO_BIG_ROWSIZE, MYF(0), max_row_size);
+	}
+}
+
+/*********************************************************************
 Creates a table definition to an InnoDB database. */
 static
 int
@@ -4589,6 +4761,8 @@ create_table_def(
 
 	error = row_create_table_for_mysql(table, trx);
 
+	innodb_check_for_record_too_big_error(flags & DICT_TF_COMPACT, error);
+
 	error = convert_error_code_to_mysql(error, NULL);
 
 	DBUG_RETURN(error);
@@ -4711,6 +4885,9 @@ create_index(
 	sure we don't create too long indexes. */
 	error = row_create_index_for_mysql(index, trx, field_lengths);
 
+	innodb_check_for_record_too_big_error(form->s->row_type
+					      != ROW_TYPE_REDUNDANT, error);
+
 	error = convert_error_code_to_mysql(error, NULL);
 
 	my_free(field_lengths, MYF(0));
@@ -4726,6 +4903,8 @@ int
 create_clustered_index_when_no_primary(
 /*===================================*/
 	trx_t*		trx,		/* in: InnoDB transaction handle */
+	ulint		comp,		/* in: ROW_FORMAT:
+					nonzero=COMPACT, 0=REDUNDANT */
 	const char*	table_name)	/* in: table name */
 {
 	dict_index_t*	index;
@@ -4734,10 +4913,12 @@ create_clustered_index_when_no_primary(
 	/* We pass 0 as the space id, and determine at a lower level the space
 	id where to store the table */
 
-	index = dict_mem_index_create((char*) table_name,
-		(char*) "GEN_CLUST_INDEX", 0, DICT_CLUSTERED, 0);
+	index = dict_mem_index_create(table_name, "GEN_CLUST_INDEX",
+				      0, DICT_CLUSTERED, 0);
 	error = row_create_index_for_mysql(index, trx, NULL);
 
+	innodb_check_for_record_too_big_error(comp, error);
+
 	error = convert_error_code_to_mysql(error, NULL);
 
 	return(error);
@@ -4779,7 +4960,7 @@ ha_innobase::create(
 	uint		i;
 	char		name2[FN_REFLEN];
 	char		norm_name[FN_REFLEN];
-	THD		*thd= ha_thd();
+	THD*		thd = ha_thd();
 	ib_longlong	auto_inc_value;
 	ulint		flags;
 
@@ -4797,7 +4978,7 @@ ha_innobase::create(
 	/* Get the transaction associated with the current thd, or create one
 	if not yet created */
 
-	parent_trx = check_trx_exists(ht, thd);
+	parent_trx = check_trx_exists(thd);
 
 	/* In case MySQL calls this in the middle of a SELECT query, release
 	possible adaptive hash latch to avoid deadlocks of threads */
@@ -4807,7 +4988,7 @@ ha_innobase::create(
 	trx = trx_allocate_for_mysql();
 
 	trx->mysql_thd = thd;
-	trx->mysql_query_str = &((*thd).query);
+	trx->mysql_query_str = thd_query(thd);
 
 	if (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) {
 		trx->check_foreigns = FALSE;
@@ -4867,8 +5048,9 @@ ha_innobase::create(
 		order the rows by their row id which is internally generated
 		by InnoDB */
 
-		error = create_clustered_index_when_no_primary(trx,
-							norm_name);
+		error = create_clustered_index_when_no_primary(
+			trx, form->s->row_type != ROW_TYPE_REDUNDANT,
+			norm_name);
 		if (error) {
 			goto cleanup;
 		}
@@ -4893,9 +5075,9 @@ ha_innobase::create(
 		}
 	}
 
-	if (thd->query != NULL) {
+	if (*trx->mysql_query_str) {
 		error = row_table_add_foreign_constraints(trx,
-			thd->query, norm_name,
+			*trx->mysql_query_str, norm_name,
 			create_info->options & HA_LEX_CREATE_TMP_TABLE);
 
 		error = convert_error_code_to_mysql(error, NULL);
@@ -4930,7 +5112,10 @@ ha_innobase::create(
 		maximum value in the column. */
 
 		auto_inc_value = create_info->auto_increment_value;
+
+		dict_table_autoinc_lock(innobase_table);
 		dict_table_autoinc_initialize(innobase_table, auto_inc_value);
+		dict_table_autoinc_unlock(innobase_table);
 	}
 
 	/* Tell the InnoDB server that there might be work for
@@ -4994,33 +5179,32 @@ ha_innobase::delete_all_rows(void)
 				/* out: error number */
 {
 	int		error;
-	THD*		thd		= ha_thd();
 
 	DBUG_ENTER("ha_innobase::delete_all_rows");
 
 	/* Get the transaction associated with the current thd, or create one
 	if not yet created, and update prebuilt->trx */
 
-	update_thd(thd);
-
-	if (thd_sql_command(thd) == SQLCOM_TRUNCATE) {
-		/* Truncate the table in InnoDB */
-
-		error = row_truncate_table_for_mysql(prebuilt->table, prebuilt->trx);
-		if (error == DB_ERROR) {
-			/* Cannot truncate; resort to ha_innobase::delete_row() */
-			goto fallback;
-		}
+	update_thd(ha_thd());
 
-		error = convert_error_code_to_mysql(error, NULL);
+	if (thd_sql_command(user_thd) != SQLCOM_TRUNCATE) {
+	fallback:
+		/* We only handle TRUNCATE TABLE t as a special case.
+		DELETE FROM t will have to use ha_innobase::delete_row(). */
+		DBUG_RETURN(my_errno=HA_ERR_WRONG_COMMAND);
+	}
 
-		DBUG_RETURN(error);
+	/* Truncate the table in InnoDB */
+
+	error = row_truncate_table_for_mysql(prebuilt->table, prebuilt->trx);
+	if (error == DB_ERROR) {
+		/* Cannot truncate; resort to ha_innobase::delete_row() */
+		goto fallback;
 	}
 
-fallback:
-	/* We only handle TRUNCATE TABLE t as a special case.
-	DELETE FROM t will have to use ha_innobase::delete_row(). */
-	DBUG_RETURN(my_errno=HA_ERR_WRONG_COMMAND);
+	error = convert_error_code_to_mysql(error, NULL);
+
+	DBUG_RETURN(error);
 }
 
 /*********************************************************************
@@ -5040,7 +5224,7 @@ ha_innobase::delete_table(
 	int	error;
 	trx_t*	parent_trx;
 	trx_t*	trx;
-	THD	*thd= ha_thd();
+	THD	*thd = ha_thd();
 	char	norm_name[1000];
 
 	DBUG_ENTER("ha_innobase::delete_table");
@@ -5048,7 +5232,7 @@ ha_innobase::delete_table(
 	/* Get the transaction associated with the current thd, or create one
 	if not yet created */
 
-	parent_trx = check_trx_exists(ht, thd);
+	parent_trx = check_trx_exists(thd);
 
 	/* In case MySQL calls this in the middle of a SELECT query, release
 	possible adaptive hash latch to avoid deadlocks of threads */
@@ -5063,75 +5247,77 @@ ha_innobase::delete_table(
 
 	trx = trx_allocate_for_mysql();
 
-	trx->mysql_thd = ha_thd();
-        trx->mysql_query_str = &(ha_thd()->query);
+	trx->mysql_thd = thd;
+	trx->mysql_query_str = thd_query(thd);
 
-        if (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) {
-          trx->check_foreigns = FALSE;
-        }
+	if (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) {
+		trx->check_foreigns = FALSE;
+	}
 
-        if (thd_test_options(thd, OPTION_RELAXED_UNIQUE_CHECKS)) {
-          trx->check_unique_secondary = FALSE;
-        }
+	if (thd_test_options(thd, OPTION_RELAXED_UNIQUE_CHECKS)) {
+		trx->check_unique_secondary = FALSE;
+	}
 
-        name_len = strlen(name);
+	name_len = strlen(name);
 
-        assert(name_len < 1000);
+	assert(name_len < 1000);
 
-        /* Strangely, MySQL passes the table name without the '.frm'
-           extension, in contrast to ::create */
+	/* Strangely, MySQL passes the table name without the '.frm'
+	extension, in contrast to ::create */
 
-        normalize_table_name(norm_name, name);
+	normalize_table_name(norm_name, name);
 
-        /* Drop the table in InnoDB */
+	/* Drop the table in InnoDB */
 
-        error = row_drop_table_for_mysql(norm_name, trx,
-                                         thd_sql_command(thd) == SQLCOM_DROP_DB);
+	error = row_drop_table_for_mysql(norm_name, trx,
+					 thd_sql_command(thd)
+					 == SQLCOM_DROP_DB);
 
-        /* Flush the log to reduce probability that the .frm files and
-           the InnoDB data dictionary get out-of-sync if the user runs
-           with innodb_flush_log_at_trx_commit = 0 */
+	/* Flush the log to reduce probability that the .frm files and
+	the InnoDB data dictionary get out-of-sync if the user runs
+	with innodb_flush_log_at_trx_commit = 0 */
 
-        log_buffer_flush_to_disk();
+	log_buffer_flush_to_disk();
 
-        /* Tell the InnoDB server that there might be work for
-           utility threads: */
+	/* Tell the InnoDB server that there might be work for
+	utility threads: */
 
-        srv_active_wake_master_thread();
+	srv_active_wake_master_thread();
 
-        innobase_commit_low(trx);
+	innobase_commit_low(trx);
 
-        trx_free_for_mysql(trx);
+	trx_free_for_mysql(trx);
 
-        error = convert_error_code_to_mysql(error, NULL);
+	error = convert_error_code_to_mysql(error, NULL);
 
-        DBUG_RETURN(error);
+	DBUG_RETURN(error);
 }
 
 /*********************************************************************
-  Removes all tables in the named database inside InnoDB. */
+Removes all tables in the named database inside InnoDB. */
 static
 void
 innobase_drop_database(
-                       /*===================*/
-                       /* out: error number */
-                       handlerton *hton, /* in: handlerton of Innodb */
-                       char*	path)	/* in: database path; inside InnoDB the name
-                                           of the last directory in the path is used as
-                                           the database name: for example, in 'mysql/data/test'
-                                           the database name is 'test' */
-{
-  ulint	len		= 0;
-  trx_t*	parent_trx;
-  trx_t*	trx;
-  char*	ptr;
-  int	error;
-  char*	namebuf;
+/*===================*/
+			/* out: error number */
+        handlerton *hton, /* in: handlerton of Innodb */
+	char*	path)	/* in: database path; inside InnoDB the name
+			of the last directory in the path is used as
+			the database name: for example, in 'mysql/data/test'
+			the database name is 'test' */
+{
+	ulint	len		= 0;
+	trx_t*	parent_trx;
+	trx_t*	trx;
+	char*	ptr;
+	int	error;
+	char*	namebuf;
+	THD*	thd		= current_thd;
 
-  /* Get the transaction associated with the current thd, or create one
-     if not yet created */
+	/* Get the transaction associated with the current thd, or create one
+	if not yet created */
 
-  parent_trx = check_trx_exists(hton, current_thd);
+	parent_trx = check_trx_exists(thd);
 
 	/* In case MySQL calls this in the middle of a SELECT query, release
 	possible adaptive hash latch to avoid deadlocks of threads */
@@ -5155,10 +5341,10 @@ innobase_drop_database(
 	innobase_casedn_str(namebuf);
 #endif
 	trx = trx_allocate_for_mysql();
-	trx->mysql_thd = current_thd;
-	trx->mysql_query_str = &((*current_thd).query);
+	trx->mysql_thd = thd;
+	trx->mysql_query_str = thd_query(thd);
 
-	if (thd_test_options(current_thd, OPTION_NO_FOREIGN_KEY_CHECKS)) {
+	if (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) {
 		trx->check_foreigns = FALSE;
 	}
 
@@ -5204,13 +5390,14 @@ ha_innobase::rename_table(
 	trx_t*	trx;
 	char	norm_from[1000];
 	char	norm_to[1000];
+	THD*	thd		= ha_thd();
 
 	DBUG_ENTER("ha_innobase::rename_table");
 
 	/* Get the transaction associated with the current thd, or create one
 	if not yet created */
 
-	parent_trx = check_trx_exists(ht, ha_thd());
+	parent_trx = check_trx_exists(thd);
 
 	/* In case MySQL calls this in the middle of a SELECT query, release
 	possible adaptive hash latch to avoid deadlocks of threads */
@@ -5224,10 +5411,10 @@ ha_innobase::rename_table(
 	}
 
 	trx = trx_allocate_for_mysql();
-	trx->mysql_thd = ha_thd();
-	trx->mysql_query_str = &((*ha_thd()).query);
+	trx->mysql_thd = thd;
+	trx->mysql_query_str = thd_query(thd);
 
-	if (thd_test_options(ha_thd(), OPTION_NO_FOREIGN_KEY_CHECKS)) {
+	if (thd_test_options(thd, OPTION_NO_FOREIGN_KEY_CHECKS)) {
 		trx->check_foreigns = FALSE;
 	}
 
@@ -5489,7 +5676,12 @@ ha_innobase::info(
 
 	if (srv_force_recovery >= SRV_FORCE_NO_IBUF_MERGE) {
 
-                DBUG_RETURN(HA_ERR_CRASHED);
+		/* We return success (0) instead of HA_ERR_CRASHED,
+		because we want MySQL to process this query and not
+		stop, like it would do if it received the error code
+		HA_ERR_CRASHED. */
+
+		DBUG_RETURN(0);
 	}
 
 	/* We do not know if MySQL can call this function before calling
@@ -5509,15 +5701,14 @@ ha_innobase::info(
 
 	if (flag & HA_STATUS_TIME) {
 		if (srv_stats_on_metadata) {
-			/* In sql_show we call with this flag: update then statistics
-			so that they are up-to-date */
+			/* In sql_show we call with this flag: update
+			then statistics so that they are up-to-date */
 
-			prebuilt->trx->op_info = (char*)"updating table statistics";
+			prebuilt->trx->op_info = "updating table statistics";
 
 			dict_update_statistics(ib_table);
 
-			prebuilt->trx->op_info = (char*)
-						  "returning various info to MySQL";
+			prebuilt->trx->op_info = "returning various info to MySQL";
 		}
 
 		my_snprintf(path, sizeof(path), "%s/%s%s",
@@ -5642,7 +5833,8 @@ ha_innobase::info(
 	}
 
 	if (flag & HA_STATUS_ERRKEY) {
-		ut_a(prebuilt->trx && prebuilt->trx->magic_n == TRX_MAGIC_N);
+		ut_a(prebuilt->trx);
+		ut_a(prebuilt->trx->magic_n == TRX_MAGIC_N);
 
 		errkey = (unsigned int) row_get_mysql_key_number_for_index(
 			(dict_index_t*) trx_get_error_info(prebuilt->trx));
@@ -5726,8 +5918,10 @@ ha_innobase::check(
 {
 	ulint		ret;
 
-	ut_a(prebuilt->trx && prebuilt->trx->magic_n == TRX_MAGIC_N);
-	ut_a(prebuilt->trx == thd_to_trx(ha_thd()));
+	DBUG_ASSERT(thd == ha_thd());
+	ut_a(prebuilt->trx);
+	ut_a(prebuilt->trx->magic_n == TRX_MAGIC_N);
+	ut_a(prebuilt->trx == thd_to_trx(thd));
 
 	if (prebuilt->mysql_template == NULL) {
 		/* Build the template; we will use a dummy template
@@ -5784,9 +5978,9 @@ ha_innobase::update_table_comment(
 	mutex_enter_noninline(&srv_dict_tmpfile_mutex);
 	rewind(srv_dict_tmpfile);
 
-	fprintf(srv_dict_tmpfile, "InnoDB free: %lu kB",
-		   (ulong) fsp_get_available_space_in_free_extents(
-					prebuilt->table->space));
+	fprintf(srv_dict_tmpfile, "InnoDB free: %llu kB",
+		fsp_get_available_space_in_free_extents(
+			prebuilt->table->space));
 
 	dict_print_info_on_foreign_keys(FALSE, srv_dict_tmpfile,
 				prebuilt->trx, prebuilt->table);
@@ -5909,8 +6103,8 @@ ha_innobase::get_foreign_key_list(THD *t
 	  while (tmp_buff[i] != '/')
 		  i++;
 	  tmp_buff+= i + 1;
-	  f_key_info.forein_id = thd_make_lex_string(thd, 0,
-		  tmp_buff, (uint) strlen(tmp_buff), 1);
+	  f_key_info.forein_id= thd_make_lex_string(thd, 0, tmp_buff,
+		  (uint) strlen(tmp_buff), 1);
 	  tmp_buff= foreign->referenced_table_name;
 
           /* Database name */
@@ -5922,23 +6116,22 @@ ha_innobase::get_foreign_key_list(THD *t
           }
           db_name[i]= 0;
           ulen= filename_to_tablename(db_name, uname, sizeof(uname));
-	  f_key_info.referenced_db = thd_make_lex_string(thd, 0,
-		  uname, ulen, 1);
+          f_key_info.referenced_db= thd_make_lex_string(thd, 0, uname, ulen, 1);
 
           /* Table name */
 	  tmp_buff+= i + 1;
           ulen= filename_to_tablename(tmp_buff, uname, sizeof(uname));
-	  f_key_info.referenced_table = thd_make_lex_string(thd, 0,
-		  uname, ulen, 1);
+          f_key_info.referenced_table= thd_make_lex_string(thd, 0, uname,
+                                                           ulen, 1);
 
 	  for (i= 0;;) {
 		  tmp_buff= foreign->foreign_col_names[i];
-		  name = thd_make_lex_string(thd, name,
-			  tmp_buff, (uint) strlen(tmp_buff), 1);
+		  name= thd_make_lex_string(thd, name, tmp_buff,
+			  (uint) strlen(tmp_buff), 1);
 		  f_key_info.foreign_fields.push_back(name);
 		  tmp_buff= foreign->referenced_col_names[i];
-		  name = thd_make_lex_string(thd, name,
-			tmp_buff, (uint) strlen(tmp_buff), 1);
+		  name= thd_make_lex_string(thd, name, tmp_buff,
+			  (uint) strlen(tmp_buff), 1);
 		  f_key_info.referenced_fields.push_back(name);
 		  if (++i >= foreign->n_fields)
 			  break;
@@ -5965,8 +6158,8 @@ ha_innobase::get_foreign_key_list(THD *t
             length=8;
             tmp_buff= "RESTRICT";
           }
-	  f_key_info.delete_method = thd_make_lex_string(
-		  thd, f_key_info.delete_method, tmp_buff, length, 1);
+          f_key_info.delete_method= thd_make_lex_string(thd, f_key_info.delete_method,
+                                                        tmp_buff, length, 1);
  
  
           if (foreign->type & DICT_FOREIGN_ON_UPDATE_CASCADE)
@@ -5989,20 +6182,19 @@ ha_innobase::get_foreign_key_list(THD *t
             length=8;
             tmp_buff= "RESTRICT";
           }
-	  f_key_info.update_method = thd_make_lex_string(
-		  thd, f_key_info.update_method, tmp_buff, length, 1);
+          f_key_info.update_method= thd_make_lex_string(thd, f_key_info.update_method,
+                                                        tmp_buff, length, 1);
           if (foreign->referenced_index &&
               foreign->referenced_index->name)
           {
-	    f_key_info.referenced_key_name = thd_make_lex_string(
-		    thd, f_key_info.referenced_key_name,
-		    foreign->referenced_index->name,
-		    strlen(foreign->referenced_index->name), 1);
+            f_key_info.referenced_key_name= 
+              thd_make_lex_string(thd, f_key_info.referenced_key_name,
+                                  foreign->referenced_index->name,
+                                  strlen(foreign->referenced_index->name), 1);
           }
 
-	  FOREIGN_KEY_INFO *pf_key_info= ((FOREIGN_KEY_INFO *)
-		  thd->memdup(&f_key_info,
-			  sizeof(FOREIGN_KEY_INFO)));
+	  FOREIGN_KEY_INFO *pf_key_info = (FOREIGN_KEY_INFO *)
+		thd_memdup(thd, &f_key_info, sizeof f_key_info);
 	  f_key_list->push_back(pf_key_info);
 	  foreign = UT_LIST_GET_NEXT(foreign_list, foreign);
   }
@@ -6094,8 +6286,7 @@ ha_innobase::extra(
 			}
 			break;
 		case HA_EXTRA_RESET_STATE:
-			prebuilt->keep_other_fields_on_keyread = 0;
-			prebuilt->read_just_key = 0;
+			reset_template(prebuilt);
 			break;
 		case HA_EXTRA_NO_KEYREAD:
 			prebuilt->read_just_key = 0;
@@ -6106,18 +6297,25 @@ ha_innobase::extra(
 		case HA_EXTRA_KEYREAD_PRESERVE_FIELDS:
 			prebuilt->keep_other_fields_on_keyread = 1;
 			break;
+
+			/* IMPORTANT: prebuilt->trx can be obsolete in
+			this method, because it is not sure that MySQL
+			calls external_lock before this method with the
+			parameters below.  We must not invoke update_thd()
+			either, because the calling threads may change.
+			CAREFUL HERE, OR MEMORY CORRUPTION MAY OCCUR! */
 		case HA_EXTRA_IGNORE_DUP_KEY:
-			prebuilt->trx->allow_duplicates= TRUE;
+			thd_to_trx(ha_thd())->duplicates |= TRX_DUP_IGNORE;
 			break;
 		case HA_EXTRA_WRITE_CAN_REPLACE:
-			prebuilt->trx->replace_duplicates= TRUE;
+			thd_to_trx(ha_thd())->duplicates |= TRX_DUP_REPLACE;
 			break;
 		case HA_EXTRA_WRITE_CANNOT_REPLACE:
-			prebuilt->trx->replace_duplicates= FALSE;
+			thd_to_trx(ha_thd())->duplicates &= ~TRX_DUP_REPLACE;
 			break;
 		case HA_EXTRA_NO_IGNORE_DUP_KEY:
-			prebuilt->trx->allow_duplicates= FALSE;
-			prebuilt->trx->replace_duplicates= FALSE;
+			thd_to_trx(ha_thd())->duplicates &=
+				~(TRX_DUP_IGNORE | TRX_DUP_REPLACE);
 			break;
 		default:/* Do nothing */
 			;
@@ -6131,8 +6329,7 @@ int ha_innobase::reset()
   if (prebuilt->blob_heap) {
     row_mysql_prebuilt_free_blob_heap(prebuilt);
   }
-  prebuilt->keep_other_fields_on_keyread = 0;
-  prebuilt->read_just_key = 0;
+  reset_template(prebuilt);
   return 0;
 }
 
@@ -6173,8 +6370,7 @@ ha_innobase::start_stmt(
 
 	prebuilt->sql_stat_start = TRUE;
 	prebuilt->hint_need_to_fetch_extra_cols = 0;
-	prebuilt->read_just_key = 0;
-	prebuilt->keep_other_fields_on_keyread = FALSE;
+	reset_template(prebuilt);
 
 	if (!prebuilt->mysql_has_locked) {
 		/* This handle is for a temporary table created inside
@@ -6261,36 +6457,12 @@ ha_innobase::external_lock(
 
 	update_thd(thd);
 
-        /* Statement based binlogging does not work in isolation level
-           READ UNCOMMITTED and READ COMMITTED since the necessary
-           locks cannot be taken. In this case, we print an
-           informative error message and return with an error. */
-        if (lock_type == F_WRLCK)
-        {
-                ulong const binlog_format= thd->variables.binlog_format;
-                ulong const tx_isolation = thd_tx_isolation(current_thd);
-                if (tx_isolation <= ISO_READ_COMMITTED &&
-                    binlog_format == BINLOG_FORMAT_STMT)
-                {
-                        char buf[256];
-                        my_snprintf(buf, sizeof(buf),
-                                    "Transaction level '%s' in"
-                                    " InnoDB is not safe for binlog mode '%s'",
-                                    tx_isolation_names[tx_isolation],
-                                    binlog_format_names[binlog_format]);
-                        my_error(ER_BINLOG_LOGGING_IMPOSSIBLE, MYF(0), buf);
-                        DBUG_RETURN(HA_ERR_LOGGING_IMPOSSIBLE);
-                }
-        }
-
-
 	trx = prebuilt->trx;
 
 	prebuilt->sql_stat_start = TRUE;
 	prebuilt->hint_need_to_fetch_extra_cols = 0;
 
-	prebuilt->read_just_key = 0;
-	prebuilt->keep_other_fields_on_keyread = FALSE;
+	reset_template(prebuilt);
 
 	if (lock_type == F_WRLCK) {
 
@@ -6348,17 +6520,17 @@ ha_innobase::external_lock(
 
 		if (prebuilt->select_lock_type != LOCK_NONE) {
 
-			if (thd_in_lock_tables(thd) &&
-				thd_sql_command(thd) == SQLCOM_LOCK_TABLES &&
-				THDVAR(thd, table_locks) &&
-				thd_test_options(thd, OPTION_NOT_AUTOCOMMIT)) {
+			if (thd_sql_command(thd) == SQLCOM_LOCK_TABLES
+			    && THDVAR(thd, table_locks)
+			    && thd_test_options(thd, OPTION_NOT_AUTOCOMMIT)
+			    && thd_in_lock_tables(thd)) {
 
 				ulint	error = row_lock_table_for_mysql(
 					prebuilt, NULL, 0);
 
 				if (error != DB_SUCCESS) {
 					error = convert_error_code_to_mysql(
-						(int) error, user_thd);
+						(int) error, thd);
 					DBUG_RETURN((int) error);
 				}
 			}
@@ -6374,6 +6546,12 @@ ha_innobase::external_lock(
 	trx->n_mysql_tables_in_use--;
 	prebuilt->mysql_has_locked = FALSE;
 
+	/* Release a possible FIFO ticket and search latch. Since we
+	may reserve the kernel mutex, we have to release the search
+	system latch first to obey the latching order. */
+
+	innobase_release_stat_resources(trx);
+
 	/* If the MySQL lock count drops to zero we know that the current SQL
 	statement has ended */
 
@@ -6382,12 +6560,6 @@ ha_innobase::external_lock(
 		trx->mysql_n_tables_locked = 0;
 		prebuilt->used_in_HANDLER = FALSE;
 
-		/* Release a possible FIFO ticket and search latch. Since we
-		may reserve the kernel mutex, we have to release the search
-		system latch first to obey the latching order. */
-
-		innobase_release_stat_resources(trx);
-
 		if (!thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)) {
 			if (trx->active_trans != 0) {
 				innobase_commit(ht, thd, TRUE);
@@ -6429,8 +6601,7 @@ ha_innobase::transactional_table_lock(
 
 	update_thd(thd);
 
-	if (prebuilt->table->ibd_file_missing
-	    && !thd_tablespace_op(ha_thd())) {
+	if (prebuilt->table->ibd_file_missing && !thd_tablespace_op(thd)) {
 		ut_print_timestamp(stderr);
 		fprintf(stderr, "  InnoDB error:\n"
 "MySQL is trying to use a table handle but the .ibd file for\n"
@@ -6448,8 +6619,7 @@ ha_innobase::transactional_table_lock(
 	prebuilt->sql_stat_start = TRUE;
 	prebuilt->hint_need_to_fetch_extra_cols = 0;
 
-	prebuilt->read_just_key = 0;
-	prebuilt->keep_other_fields_on_keyread = FALSE;
+	reset_template(prebuilt);
 
 	if (lock_type == F_WRLCK) {
 		prebuilt->select_lock_type = LOCK_X;
@@ -6475,13 +6645,13 @@ ha_innobase::transactional_table_lock(
 		trx->active_trans = 1;
 	}
 
-	if (thd_in_lock_tables(thd) && THDVAR(thd, table_locks)) {
+	if (THDVAR(thd, table_locks) && thd_in_lock_tables(thd)) {
 		ulint	error = DB_SUCCESS;
 
 		error = row_lock_table_for_mysql(prebuilt, NULL, 0);
 
 		if (error != DB_SUCCESS) {
-			error = convert_error_code_to_mysql((int) error, user_thd);
+			error = convert_error_code_to_mysql((int) error, thd);
 			DBUG_RETURN((int) error);
 		}
 
@@ -6531,7 +6701,11 @@ innodb_show_status(
 
 	DBUG_ENTER("innodb_show_status");
 
-	trx = check_trx_exists(hton, thd);
+	if (hton->state != SHOW_OPTION_YES) {
+		DBUG_RETURN(FALSE);
+	}
+
+	trx = check_trx_exists(thd);
 
 	innobase_release_stat_resources(trx);
 
@@ -6809,7 +6983,7 @@ ha_innobase::store_lock(
 	because we call update_thd() later, in ::external_lock()! Failure to
 	understand this caused a serious memory corruption bug in 5.1.11. */
 
-	trx = check_trx_exists(ht, thd);
+	trx = check_trx_exists(thd);
 
 	/* NOTE: MySQL can call this function with lock 'type' TL_IGNORE!
 	Be careful to ignore TL_IGNORE if we are going to do something with
@@ -6819,9 +6993,9 @@ ha_innobase::store_lock(
 	of the transaction. */
 
 	if (lock_type != TL_IGNORE
-	&& trx->n_mysql_tables_in_use == 0) {
+	    && trx->n_mysql_tables_in_use == 0) {
 		trx->isolation_level = innobase_map_isolation_level(
-                                    (enum_tx_isolation)thd_tx_isolation(thd));
+			(enum_tx_isolation) thd_tx_isolation(thd));
 
 		if (trx->isolation_level <= TRX_ISO_READ_COMMITTED
 		    && trx->global_read_view) {
@@ -6843,12 +7017,12 @@ ha_innobase::store_lock(
 		handle may belong to another thd that is running a query. Let
 		us in that case skip any changes to the prebuilt struct. */ 
 
-	} else if ((lock_type == TL_READ && in_lock_tables) ||
-		(lock_type == TL_READ_HIGH_PRIORITY && in_lock_tables) ||
-		lock_type == TL_READ_WITH_SHARED_LOCKS ||
-		lock_type == TL_READ_NO_INSERT ||
-		(sql_command != SQLCOM_SELECT
-			&& lock_type != TL_IGNORE)) {
+	} else if ((lock_type == TL_READ && in_lock_tables)
+		   || (lock_type == TL_READ_HIGH_PRIORITY && in_lock_tables)
+		   || lock_type == TL_READ_WITH_SHARED_LOCKS
+		   || lock_type == TL_READ_NO_INSERT
+		   || (lock_type != TL_IGNORE
+		       && sql_command != SQLCOM_SELECT)) {
 
 		/* The OR cases above are in this order:
 		1) MySQL is doing LOCK TABLES ... READ LOCAL, or we
@@ -6873,10 +7047,10 @@ ha_innobase::store_lock(
 		isolation_level = trx->isolation_level;
 
 		if ((srv_locks_unsafe_for_binlog
-			|| isolation_level == TRX_ISO_READ_COMMITTED)
-		&& isolation_level != TRX_ISO_SERIALIZABLE
-		&& (lock_type == TL_READ || lock_type == TL_READ_NO_INSERT)
-		&& (sql_command == SQLCOM_INSERT_SELECT
+		     || isolation_level == TRX_ISO_READ_COMMITTED)
+		    && isolation_level != TRX_ISO_SERIALIZABLE
+		    && (lock_type == TL_READ || lock_type == TL_READ_NO_INSERT)
+		    && (sql_command == SQLCOM_INSERT_SELECT
 			|| sql_command == SQLCOM_UPDATE
 			|| sql_command == SQLCOM_CREATE_TABLE)) {
 
@@ -6947,24 +7121,25 @@ ha_innobase::store_lock(
 		TRUE there). */
 
 		if ((lock_type >= TL_WRITE_CONCURRENT_INSERT
-		&& lock_type <= TL_WRITE)
-		&& !(in_lock_tables
-			&& sql_command == SQLCOM_LOCK_TABLES)
-		&& !thd_tablespace_op(thd)
-		&& sql_command != SQLCOM_TRUNCATE
-		&& sql_command != SQLCOM_OPTIMIZE
+		     && lock_type <= TL_WRITE)
+		    && !(in_lock_tables
+			 && sql_command == SQLCOM_LOCK_TABLES)
+		    && !thd_tablespace_op(thd)
+		    && sql_command != SQLCOM_TRUNCATE
+		    && sql_command != SQLCOM_OPTIMIZE
 
 #ifdef __WIN__
-		/* For alter table on win32 for succesful operation
-		completion it is used TL_WRITE(=10) lock instead of
-		TL_WRITE_ALLOW_READ(=6), however here in innodb handler
-		TL_WRITE is lifted to TL_WRITE_ALLOW_WRITE, which causes
-		race condition when several clients do alter table
-		simultaneously (bug #17264). This fix avoids the problem. */
-		&& sql_command != SQLCOM_ALTER_TABLE
+		    /* For alter table on win32 for successful
+		    operation completion it is used TL_WRITE(=10) lock
+		    instead of TL_WRITE_ALLOW_READ(=6), however here
+		    in innodb handler TL_WRITE is lifted to
+		    TL_WRITE_ALLOW_WRITE, which causes race condition
+		    when several clients do alter table simultaneously
+		    (bug #17264). This fix avoids the problem. */
+		    && sql_command != SQLCOM_ALTER_TABLE
 #endif
 
-		&& sql_command != SQLCOM_CREATE_TABLE) {
+		    && sql_command != SQLCOM_CREATE_TABLE) {
 
 			lock_type = TL_WRITE_ALLOW_WRITE;
 		}
@@ -7002,18 +7177,23 @@ the value of the auto-inc counter. */
 int
 ha_innobase::innobase_read_and_init_auto_inc(
 /*=========================================*/
-				/* out: 0 or error code: deadlock or lock wait
-				timeout */
-	longlong*	ret)	/* out: auto-inc value */
+						/* out: 0 or error code:
+						deadlock or lock wait timeout */
+        longlong*	value)			/* out: the autoinc value */
 {
 	longlong	auto_inc;
-	ulint		old_select_lock_type;
+	ibool		stmt_start;
+	int		mysql_error = 0;
+	dict_table_t*	innodb_table = prebuilt->table;
 	ibool		trx_was_not_started	= FALSE;
-	int		error;
 
 	ut_a(prebuilt);
 	ut_a(prebuilt->table);
 
+	/* Remember if we are in the beginning of an SQL statement.
+	This function must not change that flag. */
+	stmt_start = prebuilt->sql_stat_start;
+
 	/* Prepare prebuilt->trx in the table handle */
 	update_thd(ha_thd());
 
@@ -7026,114 +7206,117 @@ ha_innobase::innobase_read_and_init_auto
 
 	trx_search_latch_release_if_reserved(prebuilt->trx);
 
-	auto_inc = dict_table_autoinc_read(prebuilt->table);
+	dict_table_autoinc_lock(prebuilt->table);
 
-	if (auto_inc != 0) {
-		/* Already initialized */
-		*ret = auto_inc;
-
-		error = 0;
+	auto_inc = dict_table_autoinc_read(prebuilt->table);
 
-		goto func_exit_early;
+	/* Was the AUTOINC counter reset during normal processing, if
+	so then we simply start count from 1. No need to go to the index.*/
+	if (auto_inc == 0 && innodb_table->autoinc_inited) {
+		++auto_inc;
+		dict_table_autoinc_initialize(innodb_table, auto_inc);
 	}
 
-	error = row_lock_table_autoinc_for_mysql(prebuilt);
+	if (auto_inc == 0) {
+		dict_index_t* index;
+		ulint error = DB_SUCCESS;
+		const char* autoinc_col_name;
 
-	if (error != DB_SUCCESS) {
-		error = convert_error_code_to_mysql(error, user_thd);
+		ut_a(!innodb_table->autoinc_inited);
 
-		goto func_exit_early;
-	}
+		index = innobase_get_index(table->s->next_number_index);
 
-	/* Check again if someone has initialized the counter meanwhile */
-	auto_inc = dict_table_autoinc_read(prebuilt->table);
+		autoinc_col_name = table->found_next_number_field->field_name;
 
-	if (auto_inc != 0) {
-		*ret = auto_inc;
+		error = row_search_max_autoinc(
+			index, autoinc_col_name, &auto_inc);
 
-		error = 0;
+		if (error == DB_SUCCESS) {
+			++auto_inc;
+			dict_table_autoinc_initialize(innodb_table, auto_inc);
+		} else {
+			fprintf(stderr, "  InnoDB error: Couldn't read the "
+				"max AUTOINC value from index (%s).\n",
+				index->name);
 
-		goto func_exit_early;
+			mysql_error = 1;
+		}
 	}
 
-	(void) extra(HA_EXTRA_KEYREAD);
-	index_init(table->s->next_number_index, 1);
+	*value = auto_inc;
 
-	/* Starting from 5.0.9, we use a consistent read to read the auto-inc
-	column maximum value. This eliminates the spurious deadlocks caused
-	by the row X-lock that we previously used. Note the following flaw
-	in our algorithm: if some other user meanwhile UPDATEs the auto-inc
-	column, our consistent read will not return the largest value. We
-	accept this flaw, since the deadlocks were a bigger trouble. */
+	dict_table_autoinc_unlock(prebuilt->table);
 
-	/* Fetch all the columns in the key */
+	/* Since MySQL does not seem to call autocommit after SHOW TABLE
+	STATUS (even if we would register the trx here), we commit our
+	transaction here if it was started here. This is to eliminate a
+	dangling transaction. If the user had AUTOCOMMIT=0, then SHOW
+	TABLE STATUS does leave a dangling transaction if the user does not
+	himself call COMMIT. */
 
-	prebuilt->hint_need_to_fetch_extra_cols = ROW_RETRIEVE_ALL_COLS;
+	if (trx_was_not_started) {
 
-	old_select_lock_type = prebuilt->select_lock_type;
-	prebuilt->select_lock_type = LOCK_NONE;
+		innobase_commit_low(prebuilt->trx);
+	}
 
-	/* Eliminate an InnoDB error print that happens when we try to SELECT
-	from a table when no table has been locked in ::external_lock(). */
-	prebuilt->trx->n_mysql_tables_in_use++;
+	prebuilt->sql_stat_start = stmt_start;
 
-	error = index_last(table->record[1]);
+	return(mysql_error);
+}
 
-	prebuilt->trx->n_mysql_tables_in_use--;
-	prebuilt->select_lock_type = old_select_lock_type;
+/*******************************************************************************
+Read the next autoinc value, initialize the table if it's not initialized.
+On return if there is no error then the tables AUTOINC lock is locked.*/
 
-	if (error) {
-		if (error == HA_ERR_END_OF_FILE) {
-			/* The table was empty, initialize to 1 */
-			auto_inc = 1;
+ulong
+ha_innobase::innobase_get_auto_increment(
+	ulonglong*	value)		/* out: autoinc value */
+{
+	ulint		error;
 
-			error = 0;
-		} else {
-			/* This should not happen in a consistent read */
-		  sql_print_error("Consistent read of auto-inc column "
-				  "returned %lu", (ulong) error);
-			auto_inc = -1;
+	do {
+		error = innobase_autoinc_lock();
 
-			goto func_exit;
-		}
-	} else {
-		/* Initialize to max(col) + 1; we use
-		'found_next_number_field' below because MySQL in SHOW TABLE
-		STATUS does not seem to set 'next_number_field'. The comment
-		in table.h says that 'next_number_field' is set when it is
-		'active'.
-		Since 5.1 MySQL enforces that we announce fields which we will
-		read; as we only do a val_*() call, dbug_tmp_use_all_columns()
-		with read_set is sufficient. */
-
-		my_bitmap_map *old_map;
-		old_map= dbug_tmp_use_all_columns(table, table->read_set);
-		auto_inc = (longlong) table->found_next_number_field->
-				val_int_offset(table->s->rec_buff_length) + 1;
-		dbug_tmp_restore_column_map(table->read_set, old_map);
-	}
+		if (error == DB_SUCCESS) {
+			ib_longlong	autoinc;
 
-	dict_table_autoinc_initialize(prebuilt->table, auto_inc);
+			/* Determine the first value of the interval */
+			autoinc = dict_table_autoinc_read(prebuilt->table);
 
-func_exit:
-	(void) extra(HA_EXTRA_NO_KEYREAD);
+			/* We need to initialize the AUTO-INC value, for
+			that we release all locks.*/
+			if (autoinc <= 0) {
+				trx_t*		trx;
+
+				trx = prebuilt->trx;
+				dict_table_autoinc_unlock(prebuilt->table);
 
-	index_end();
+				if (trx->auto_inc_lock) {
+					/* If we had reserved the AUTO-INC
+					lock in this SQL statement we release
+					it before retrying.*/
+					row_unlock_table_autoinc_for_mysql(trx);
+				}
 
-	*ret = auto_inc;
+				/* Just to make sure */
+				ut_a(!trx->auto_inc_lock);
 
-func_exit_early:
-	/* Since MySQL does not seem to call autocommit after SHOW TABLE
-	STATUS (even if we would register the trx here), we commit our
-	transaction here if it was started here. This is to eliminate a
-	dangling transaction. If the user had AUTOCOMMIT=0, then SHOW
-	TABLE STATUS does leave a dangling transaction if the user does not
-	himself call COMMIT. */
+				int	mysql_error;
 
-	if (trx_was_not_started) {
+				mysql_error = innobase_read_and_init_auto_inc(
+					&autoinc);
 
-		innobase_commit_low(prebuilt->trx);
-	}
+				if (!mysql_error) {
+					/* Should have read the proper value */
+					ut_a(autoinc > 0);
+				} else {
+					error = DB_ERROR;
+				}
+			} else {
+				*value = (ulonglong) autoinc;
+			}
+		}
+	} while (*value == 0 && error == DB_SUCCESS);
 
 	return(error);
 }
@@ -7146,37 +7329,87 @@ auto-inc counter in *first_value, and UL
 we have a table-level lock). offset, increment, nb_desired_values are ignored.
 *first_value is set to -1 if error (deadlock or lock wait timeout)            */
 
-void ha_innobase::get_auto_increment(
+void
+ha_innobase::get_auto_increment(
 /*=================================*/
-        ulonglong offset,              /* in */
-        ulonglong increment,           /* in */
-        ulonglong nb_desired_values,   /* in */
-        ulonglong *first_value,        /* out */
-        ulonglong *nb_reserved_values) /* out */
+        ulonglong	offset,              /* in: */
+        ulonglong	increment,           /* in: table autoinc increment */
+        ulonglong	nb_desired_values,   /* in: number of values reqd */
+        ulonglong	*first_value,        /* out: the autoinc value */
+        ulonglong	*nb_reserved_values) /* out: count of reserved values */
 {
-	longlong	nr;
-	int		error;
+	ulint		error;
+	ulonglong	autoinc = 0;
 
 	/* Prepare prebuilt->trx in the table handle */
 	update_thd(ha_thd());
 
-	error = innobase_read_and_init_auto_inc(&nr);
+	error = innobase_get_auto_increment(&autoinc);
 
-	if (error) {
-		/* This should never happen in the current (5.0.6) code, since
-		we call this function only after the counter has been
-		initialized. */
+	if (error != DB_SUCCESS) {
+		/* This should never happen in the code > ver 5.0.6,
+		since we call this function only after the counter
+		has been initialized. */
 
 		ut_print_timestamp(stderr);
-		sql_print_error("Error %lu in ::get_auto_increment()",
-				(ulong) error);
-                *first_value= (~(ulonglong) 0);
+		sql_print_error("Error %lu in ::get_auto_increment()", error);
+
+		*first_value = (~(ulonglong) 0);
 		return;
 	}
 
-        *first_value= (ulonglong) nr;
-        /* table-level autoinc lock reserves up to +inf */
-        *nb_reserved_values= ULONGLONG_MAX;
+	/* This is a hack, since nb_desired_values seems to be accurate only
+	for the first call to get_auto_increment() for multi-row INSERT and
+	meaningless for other statements e.g, LOAD etc. Subsequent calls to
+	this method for the same statement results in different values which
+	don't make sense. Therefore we store the value the first time we are
+	called and count down from that as rows are written (see write_row()).
+
+	We make one exception, if the *first_value is precomputed by MySQL
+	we use that value. And set the number of reserved values to 1 if
+	this is the first time we were called for the SQL statement, this
+	will force MySQL to call us for the next value. If we are in the
+	middle of a multi-row insert we preserve the existing counter.*/
+	if (*first_value == 0) {
+
+		/* Called for the first time ? */
+		if (prebuilt->trx->n_autoinc_rows == 0) {
+
+			prebuilt->trx->n_autoinc_rows = nb_desired_values;
+
+			/* It's possible for nb_desired_values to be 0:
+			e.g., INSERT INTO T1(C) SELECT C FROM T2; */
+			if (nb_desired_values == 0) {
+
+				++prebuilt->trx->n_autoinc_rows;
+			}
+		}
+
+		*first_value = autoinc;
+
+	} else if (prebuilt->trx->n_autoinc_rows == 0) {
+
+		prebuilt->trx->n_autoinc_rows = 1;
+	}
+
+	ut_a(prebuilt->trx->n_autoinc_rows > 0);
+
+	*nb_reserved_values = prebuilt->trx->n_autoinc_rows;
+
+	/* Compute the last value in the interval */
+	prebuilt->last_value = *first_value + (*nb_reserved_values * increment);
+
+	ut_a(prebuilt->last_value >= *first_value);
+
+	/* Update the table autoinc variable */
+	dict_table_autoinc_update(prebuilt->table, prebuilt->last_value);
+
+	/* The increment to be used to increase the AUTOINC value, we use
+	this in write_row() and update_row() to increase the autoinc counter
+	for columns that are filled by the user.*/
+	prebuilt->table->autoinc_increment = increment;
+
+	dict_table_autoinc_unlock(prebuilt->table);
 }
 
 /* See comment in handler.h */
@@ -7197,7 +7430,7 @@ ha_innobase::reset_auto_increment(ulongl
 		DBUG_RETURN(error);
 	}
 
-	dict_table_autoinc_initialize(prebuilt->table, value);
+	innobase_reset_autoinc(value);
 
 	DBUG_RETURN(0);
 }
@@ -7206,7 +7439,7 @@ ha_innobase::reset_auto_increment(ulongl
 bool
 ha_innobase::get_error_message(int error, String *buf)
 {
-	trx_t*	trx = check_trx_exists(ht, ha_thd());
+	trx_t*	trx = check_trx_exists(ha_thd());
 
 	buf->copy(trx->detailed_error, strlen(trx->detailed_error),
 		system_charset_info);
@@ -7328,7 +7561,6 @@ ha_innobase::get_mysql_bin_log_pos()
 	return(trx_sys_mysql_bin_log_pos);
 }
 
-extern "C" {
 /**********************************************************************
 This function is used to find the storage length in bytes of the first n
 characters for prefix indexes using a multibyte character set. The function
@@ -7337,7 +7569,7 @@ index field in bytes.
 
 NOTE: the prototype of this function is copied to data0type.c! If you change
 this function, you MUST change also data0type.c! */
-
+extern "C"
 ulint
 innobase_get_at_most_n_mbchars(
 /*===========================*/
@@ -7402,39 +7634,6 @@ innobase_get_at_most_n_mbchars(
 
 	return(char_length);
 }
-}
-
-/**********************************************************************
-This function returns true if
-
-1) SQL-query in the current thread
-is either REPLACE or LOAD DATA INFILE REPLACE.
-
-2) SQL-query in the current thread
-is INSERT ON DUPLICATE KEY UPDATE.
-
-NOTE that storage/innobase/row/row0ins.c must contain the
-prototype for this function ! */
-extern "C"
-ibool
-innobase_query_is_update(void)
-/*==========================*/
-{
-	THD*	thd = current_thd;
-	trx_t*	trx;
-
-	if (!thd) {
-		/* InnoDB's internal threads may run InnoDB stored procedures
-		that call this function. Then current_thd is not defined
-		(it is probably NULL). */
-
-		return(FALSE);
-	}
-
-	trx = check_trx_exists(innodb_hton_ptr, thd);
-
-	return(trx->allow_duplicates);
-}
 
 /***********************************************************************
 This function is used to prepare X/Open XA distributed transaction   */
@@ -7450,7 +7649,7 @@ innobase_xa_prepare(
 			FALSE - the current SQL statement ended */
 {
 	int error = 0;
-	trx_t* trx = check_trx_exists(hton, thd);
+	trx_t* trx = check_trx_exists(thd);
 
 	if (thd_sql_command(thd) != SQLCOM_XA_PREPARE &&
 	    (all || !thd_test_options(thd, OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))
@@ -7484,7 +7683,7 @@ innobase_xa_prepare(
 		return(0);
 	}
 
-	trx->xid=thd->transaction.xid_state.xid;
+	thd_get_xid(thd, &trx->xid);
 
 	/* Release a possible FIFO ticket and search latch. Since we will
 	reserve the kernel mutex, we have to release the search system latch
@@ -7517,6 +7716,7 @@ innobase_xa_prepare(
 
 			row_unlock_table_autoinc_for_mysql(trx);
 		}
+
 		/* Store the current undo_no of the transaction so that we
 		know where to roll back if we have to roll back the next
 		SQL statement */
@@ -7611,8 +7811,7 @@ innobase_create_cursor_view(
         handlerton *hton, /* in: innobase hton */
 	THD* thd)	  /* in: user thread handle */
 {
-	return(read_cursor_view_create_for_mysql(
-					check_trx_exists(hton, thd)));
+	return(read_cursor_view_create_for_mysql(check_trx_exists(thd)));
 }
 
 /***********************************************************************
@@ -7627,8 +7826,8 @@ innobase_close_cursor_view(
 	THD*	thd,	/* in: user thread handle */
 	void*	curview)/* in: Consistent read view to be closed */
 {
-	read_cursor_view_close_for_mysql(check_trx_exists(hton, current_thd),
-						(cursor_view_t*) curview);
+	read_cursor_view_close_for_mysql(check_trx_exists(thd),
+					 (cursor_view_t*) curview);
 }
 
 /***********************************************************************
@@ -7644,8 +7843,8 @@ innobase_set_cursor_view(
 	THD*	thd,	/* in: user thread handle */
 	void*	curview)/* in: Consistent cursor view to be set */
 {
-	read_cursor_set_for_mysql(check_trx_exists(hton, current_thd),
-						(cursor_view_t*) curview);
+	read_cursor_set_for_mysql(check_trx_exists(thd),
+				  (cursor_view_t*) curview);
 }
 
 
@@ -7728,8 +7927,9 @@ static MYSQL_SYSVAR_BOOL(file_per_table,
 
 static MYSQL_SYSVAR_ULONG(flush_log_at_trx_commit, srv_flush_log_at_trx_commit,
   PLUGIN_VAR_OPCMDARG,
- "Set to 0 (write and flush once per second), 1 (write and flush at each commit)"
- " or 2 (write at commit, flush once per second).",
+  "Set to 0 (write and flush once per second),"
+  " 1 (write and flush at each commit)"
+  " or 2 (write at commit, flush once per second).",
   NULL, NULL, 1, 0, 2, 0);
 
 static MYSQL_SYSVAR_STR(flush_method, innobase_unix_file_flush_method,
@@ -7741,11 +7941,11 @@ static MYSQL_SYSVAR_BOOL(locks_unsafe_fo
   "Force InnoDB to not use next-key locking, to use only row-level locking.",
   NULL, NULL, FALSE);
 
+#ifdef UNIV_LOG_ARCHIVE
 static MYSQL_SYSVAR_STR(log_arch_dir, innobase_log_arch_dir,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
   "Where full logs should be archived.", NULL, NULL, NULL);
 
-#ifdef UNIV_LOG_ARCHIVE
 static MYSQL_SYSVAR_BOOL(log_archive, innobase_log_archive,
   PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
   "Set to 1 if you want to have logs archived.", NULL, NULL, FALSE);
@@ -7883,8 +8083,8 @@ static struct st_mysql_sys_var* innobase
   MYSQL_SYSVAR(force_recovery),
   MYSQL_SYSVAR(locks_unsafe_for_binlog),
   MYSQL_SYSVAR(lock_wait_timeout),
-  MYSQL_SYSVAR(log_arch_dir),
 #ifdef UNIV_LOG_ARCHIVE
+  MYSQL_SYSVAR(log_arch_dir),
   MYSQL_SYSVAR(log_archive),
 #endif /* UNIV_LOG_ARCHIVE */
   MYSQL_SYSVAR(log_buffer_size),
@@ -7922,5 +8122,3 @@ mysql_declare_plugin(innobase)
   NULL /* reserved */
 }
 mysql_declare_plugin_end;
-
-#endif
diff -Nrup a/storage/innobase/handler/ha_innodb.h b/storage/innobase/handler/ha_innodb.h
--- a/storage/innobase/handler/ha_innodb.h	2007-06-12 13:12:36 -07:00
+++ b/storage/innobase/handler/ha_innodb.h	2007-07-18 18:10:22 -07:00
@@ -32,7 +32,10 @@ typedef struct st_innobase_share {
 } INNOBASE_SHARE;
 
 
+struct dict_index_struct;
 struct row_prebuilt_struct;
+
+typedef struct dict_index_struct dict_index_t;
 typedef struct row_prebuilt_struct row_prebuilt_t;
 
 /* The class defining a handle to an Innodb table */
@@ -54,7 +57,7 @@ class ha_innobase: public handler
 	ulong		upd_and_key_val_buff_len;
 					/* the length of each of the previous
 					two buffers */
-	Table_flags	int_table_flags;
+	ulong		int_table_flags;
 	uint		primary_key;
 	ulong		start_of_scan;	/* this is set to 1 when we are
 					starting a table scan but have not
@@ -70,6 +73,11 @@ class ha_innobase: public handler
 	int change_active_index(uint keynr);
 	int general_fetch(uchar* buf, uint direction, uint match_mode);
 	int innobase_read_and_init_auto_inc(longlong* ret);
+	ulong innobase_autoinc_lock();
+	ulong innobase_set_max_autoinc(ulonglong auto_inc);
+	ulong innobase_reset_autoinc(ulonglong auto_inc);
+	ulong innobase_get_auto_increment(ulonglong* value);
+	dict_index_t* innobase_get_index(uint keynr);
 
 	/* Init values for the class: */
  public:
@@ -84,7 +92,7 @@ class ha_innobase: public handler
 	const char* table_type() const { return("InnoDB");}
 	const char *index_type(uint key_number) { return "BTREE"; }
 	const char** bas_ext() const;
-	Table_flags table_flags() const;
+	ulonglong table_flags() const { return int_table_flags; }
 	ulong index_flags(uint idx, uint part, bool all_parts) const
 	{
 	  return (HA_READ_NEXT |
@@ -189,29 +197,6 @@ class ha_innobase: public handler
 	bool check_if_incompatible_data(HA_CREATE_INFO *info,
 					uint table_changes);
 };
-
-extern long innobase_mirrored_log_groups, innobase_log_files_in_group;
-extern long long innobase_buffer_pool_size, innobase_log_file_size;
-extern long innobase_log_buffer_size;
-extern long innobase_additional_mem_pool_size;
-extern long innobase_buffer_pool_awe_mem_mb;
-extern long innobase_file_io_threads, innobase_lock_wait_timeout;
-extern long innobase_force_recovery;
-extern long innobase_open_files;
-extern char *innobase_data_home_dir, *innobase_data_file_path;
-extern char *innobase_log_group_home_dir, *innobase_log_arch_dir;
-extern char *innobase_unix_file_flush_method;
-extern "C" {
-extern ulong srv_max_buf_pool_modified_pct;
-extern ulong srv_max_purge_lag;
-extern ulong srv_auto_extend_increment;
-extern ulong srv_n_spin_wait_rounds;
-extern ulong srv_n_free_tickets_to_enter;
-extern ulong srv_thread_sleep_delay;
-extern ulong srv_thread_concurrency;
-extern ulong srv_commit_concurrency;
-extern ulong srv_flush_log_at_trx_commit;
-}
 
 /*
   don't delete it - it may be re-enabled later
diff -Nrup a/storage/innobase/ibuf/ibuf0ibuf.c b/storage/innobase/ibuf/ibuf0ibuf.c
--- a/storage/innobase/ibuf/ibuf0ibuf.c	2007-03-22 14:59:25 -07:00
+++ b/storage/innobase/ibuf/ibuf0ibuf.c	2007-07-18 18:10:22 -07:00
@@ -150,9 +150,30 @@ ulint	ibuf_flush_count	= 0;
 #define IBUF_COUNT_N_PAGES	2000
 
 /* Buffered entry counts for file pages, used in debugging */
-static ulint*	ibuf_counts[IBUF_COUNT_N_SPACES];
+static ulint	ibuf_counts[IBUF_COUNT_N_SPACES][IBUF_COUNT_N_PAGES];
 
-static ibool	ibuf_counts_inited	= FALSE;
+/**********************************************************************
+Checks that the indexes to ibuf_counts[][] are within limits. */
+UNIV_INLINE
+void
+ibuf_count_check(
+/*=============*/
+	ulint	space_id,	/* in: space identifier */
+	ulint	page_no)	/* in: page number */
+{
+	if (space_id < IBUF_COUNT_N_SPACES && page_no < IBUF_COUNT_N_PAGES) {
+		return;
+	}
+
+	fprintf(stderr,
+		"InnoDB: UNIV_IBUF_DEBUG limits space_id and page_no\n"
+		"InnoDB: and breaks crash recovery.\n"
+		"InnoDB: space_id=%lu, should be 0<=space_id<%lu\n"
+		"InnoDB: page_no=%lu, should be 0<=page_no<%lu\n",
+		(ulint) space_id, (ulint) IBUF_COUNT_N_SPACES,
+		(ulint) page_no, (ulint) IBUF_COUNT_N_PAGES);
+	ut_error;
+}
 #endif
 
 /* The start address for an insert buffer bitmap page bitmap */
@@ -328,15 +349,9 @@ ibuf_count_get(
 	ulint	space,	/* in: space id */
 	ulint	page_no)/* in: page number */
 {
-	ut_ad(space < IBUF_COUNT_N_SPACES);
-	ut_ad(page_no < IBUF_COUNT_N_PAGES);
-
-	if (!ibuf_counts_inited) {
-
-		return(0);
-	}
+	ibuf_count_check(space, page_no);
 
-	return(*(ibuf_counts[space] + page_no));
+	return(ibuf_counts[space][page_no]);
 }
 
 /**********************************************************************
@@ -349,11 +364,10 @@ ibuf_count_set(
 	ulint	page_no,/* in: page number */
 	ulint	val)	/* in: value to set */
 {
-	ut_a(space < IBUF_COUNT_N_SPACES);
-	ut_a(page_no < IBUF_COUNT_N_PAGES);
+	ibuf_count_check(space, page_no);
 	ut_a(val < UNIV_PAGE_SIZE);
 
-	*(ibuf_counts[space] + page_no) = val;
+	ibuf_counts[space][page_no] = val;
 }
 #endif
 
@@ -378,22 +392,6 @@ ibuf_init_at_db_start(void)
 
 	ibuf->size = 0;
 
-#ifdef UNIV_IBUF_DEBUG
-	{
-		ulint	i, j;
-
-		for (i = 0; i < IBUF_COUNT_N_SPACES; i++) {
-
-			ibuf_counts[i] = mem_alloc(sizeof(ulint)
-						   * IBUF_COUNT_N_PAGES);
-			for (j = 0; j < IBUF_COUNT_N_PAGES; j++) {
-				ibuf_count_set(i, j, 0);
-			}
-		}
-
-		ibuf_counts_inited = TRUE;
-	}
-#endif
 	mutex_create(&ibuf_pessimistic_insert_mutex,
 		     SYNC_IBUF_PESS_INSERT_MUTEX);
 
@@ -567,7 +565,8 @@ ibuf_bitmap_page_init(
 
 	bit_offset = XDES_DESCRIBED_PER_PAGE * IBUF_BITS_PER_PAGE;
 
-	byte_offset = bit_offset / 8 + 1; /* better: (bit_offset + 7) / 8 */
+	byte_offset = bit_offset / 8 + 1;
+	/* better: byte_offset = UT_BITS_IN_BYTES(bit_offset); */
 
 	fil_page_set_type(page, FIL_PAGE_IBUF_BITMAP);
 
@@ -1441,6 +1440,9 @@ ibuf_entry_build(
 		*buf2++ = 0; /* write the compact format indicator */
 	}
 	for (i = 0; i < n_fields; i++) {
+		ulint			fixed_len;
+		const dict_field_t*	ifield;
+
 		/* We add 4 below because we have the 4 extra fields at the
 		start of an ibuf record */
 
@@ -1448,10 +1450,30 @@ ibuf_entry_build(
 		entry_field = dtuple_get_nth_field(entry, i);
 		dfield_copy(field, entry_field);
 
+		ifield = dict_index_get_nth_field(index, i);
+		/* Prefix index columns of fixed-length columns are of
+		fixed length.  However, in the function call below,
+		dfield_get_type(entry_field) contains the fixed length
+		of the column in the clustered index.  Replace it with
+		the fixed length of the secondary index column. */
+		fixed_len = ifield->fixed_len;
+
+#ifdef UNIV_DEBUG
+		if (fixed_len) {
+			/* dict_index_add_col() should guarantee these */
+			ut_ad(fixed_len <= (ulint) entry_field->type.len);
+			if (ifield->prefix_len) {
+				ut_ad(ifield->prefix_len == fixed_len);
+			} else {
+				ut_ad(fixed_len
+				      == (ulint) entry_field->type.len);
+			}
+		}
+#endif /* UNIV_DEBUG */
+
 		dtype_new_store_for_order_and_null_size(
 			buf2 + i * DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE,
-			dfield_get_type(entry_field),
-			dict_index_get_nth_field(index, i)->prefix_len);
+			dfield_get_type(entry_field), fixed_len);
 	}
 
 	/* Store the type info in buf2 to field 3 of tuple */
diff -Nrup a/storage/innobase/include/buf0buf.ic b/storage/innobase/include/buf0buf.ic
--- a/storage/innobase/include/buf0buf.ic	2007-03-22 14:59:25 -07:00
+++ b/storage/innobase/include/buf0buf.ic	2007-07-18 18:10:22 -07:00
@@ -28,7 +28,7 @@ buf_block_peek_if_too_old(
 	buf_block_t*	block)	/* in: block to make younger */
 {
 	return(buf_pool->freed_page_clock >= block->freed_page_clock
-	       + 1 + (buf_pool->curr_size / 1024));
+	       + 1 + (buf_pool->curr_size / 4));
 }
 
 /*************************************************************************
diff -Nrup a/storage/innobase/include/db0err.h b/storage/innobase/include/db0err.h
--- a/storage/innobase/include/db0err.h	2006-03-10 08:22:04 -08:00
+++ b/storage/innobase/include/db0err.h	2007-07-18 18:10:22 -07:00
@@ -62,6 +62,11 @@ Created 5/24/1996 Heikki Tuuri
 					lead to a duplicate key in some
 					table */
 
+#define DB_TOO_MANY_CONCURRENT_TRXS 47	/* when InnoDB runs out of the
+					preconfigured undo slots, this can
+					only happen when there are too many
+					concurrent transactions */
+
 /* The following are partial failure codes */
 #define DB_FAIL			1000
 #define DB_OVERFLOW		1001
diff -Nrup a/storage/innobase/include/dict0dict.h b/storage/innobase/include/dict0dict.h
--- a/storage/innobase/include/dict0dict.h	2007-01-04 18:51:29 -08:00
+++ b/storage/innobase/include/dict0dict.h	2007-07-18 18:10:22 -07:00
@@ -92,6 +92,17 @@ dict_col_copy_type_noninline(
 /*=========================*/
 	const dict_col_t*	col,	/* in: column */
 	dtype_t*		type);	/* out: data type */
+#ifdef UNIV_DEBUG
+/*************************************************************************
+Assert that a column and a data type match. */
+UNIV_INLINE
+ibool
+dict_col_type_assert_equal(
+/*=======================*/
+					/* out: TRUE */
+	const dict_col_t*	col,	/* in: column */
+	const dtype_t*		type);	/* in: data type */
+#endif /* UNIV_DEBUG */
 /***************************************************************************
 Returns the minimum size of the column. */
 UNIV_INLINE
@@ -160,6 +171,13 @@ dict_col_name_is_reserved(
 				/* out: TRUE if name is reserved */
 	const char*	name);	/* in: column name */
 /************************************************************************
+Acquire the autoinc lock.*/
+
+void
+dict_table_autoinc_lock(
+/*====================*/
+	dict_table_t*	table);	/* in: table */
+/************************************************************************
 Initializes the autoinc counter. It is not an error to initialize an already
 initialized counter. */
 
@@ -169,22 +187,6 @@ dict_table_autoinc_initialize(
 	dict_table_t*	table,	/* in: table */
 	ib_longlong	value);	/* in: next value to assign to a row */
 /************************************************************************
-Gets the next autoinc value (== autoinc counter value), 0 if not yet
-initialized. If initialized, increments the counter by 1. */
-
-ib_longlong
-dict_table_autoinc_get(
-/*===================*/
-				/* out: value for a new row, or 0 */
-	dict_table_t*	table);	/* in: table */
-/************************************************************************
-Decrements the autoinc counter value by 1. */
-
-void
-dict_table_autoinc_decrement(
-/*=========================*/
-	dict_table_t*	table);	/* in: table */
-/************************************************************************
 Reads the next autoinc value (== autoinc counter value), 0 if not yet
 initialized. */
 
@@ -194,15 +196,6 @@ dict_table_autoinc_read(
 				/* out: value for a new row, or 0 */
 	dict_table_t*	table);	/* in: table */
 /************************************************************************
-Peeks the autoinc counter value, 0 if not yet initialized. Does not
-increment the counter. The read not protected by any mutex! */
-
-ib_longlong
-dict_table_autoinc_peek(
-/*====================*/
-				/* out: value of the counter */
-	dict_table_t*	table);	/* in: table */
-/************************************************************************
 Updates the autoinc counter if the value supplied is equal or bigger than the
 current value. If not inited, does nothing. */
 
@@ -212,6 +205,13 @@ dict_table_autoinc_update(
 
 	dict_table_t*	table,	/* in: table */
 	ib_longlong	value);	/* in: value which was assigned to a row */
+/************************************************************************
+Release the autoinc lock.*/
+
+void
+dict_table_autoinc_unlock(
+/*======================*/
+	dict_table_t*	table);	/* in: table */
 /**************************************************************************
 Adds a table object to the dictionary cache. */
 
diff -Nrup a/storage/innobase/include/dict0dict.ic b/storage/innobase/include/dict0dict.ic
--- a/storage/innobase/include/dict0dict.ic	2007-03-22 14:59:25 -07:00
+++ b/storage/innobase/include/dict0dict.ic	2007-07-18 18:10:23 -07:00
@@ -30,6 +30,30 @@ dict_col_copy_type(
 	type->mbmaxlen = col->mbmaxlen;
 }
 
+#ifdef UNIV_DEBUG
+/*************************************************************************
+Assert that a column and a data type match. */
+UNIV_INLINE
+ibool
+dict_col_type_assert_equal(
+/*=======================*/
+					/* out: TRUE */
+	const dict_col_t*	col,	/* in: column */
+	const dtype_t*		type)	/* in: data type */
+{
+	ut_ad(col);
+	ut_ad(type);
+
+	ut_ad(col->mtype == type->mtype);
+	ut_ad(col->prtype == type->prtype);
+	ut_ad(col->len == type->len);
+	ut_ad(col->mbminlen == type->mbminlen);
+	ut_ad(col->mbmaxlen == type->mbmaxlen);
+
+	return(TRUE);
+}
+#endif /* UNIV_DEBUG */
+
 /***************************************************************************
 Returns the minimum size of the column. */
 UNIV_INLINE
diff -Nrup a/storage/innobase/include/dict0mem.h b/storage/innobase/include/dict0mem.h
--- a/storage/innobase/include/dict0mem.h	2006-09-21 00:38:47 -07:00
+++ b/storage/innobase/include/dict0mem.h	2007-07-18 18:10:23 -07:00
@@ -158,10 +158,13 @@ struct dict_col_struct{
 					of an index */
 };
 
-/* DICT_MAX_INDEX_COL_LEN is measured in bytes and is the max index column
-length + 1. Starting from 4.1.6, we set it to < 3 * 256, so that one can
-create a column prefix index on 255 characters of a TEXT field also in the
-UTF-8 charset. In that charset, a character may take at most 3 bytes. */
+/* DICT_MAX_INDEX_COL_LEN is measured in bytes and is the maximum
+indexed column length (or indexed prefix length). It is set to 3*256,
+so that one can create a column prefix index on 256 characters of a
+TEXT or VARCHAR column also in the UTF-8 charset. In that charset,
+a character may take at most 3 bytes.
+This constant MUST NOT BE CHANGED, or the compatibility of InnoDB data
+files would be at risk! */
 
 #define DICT_MAX_INDEX_COL_LEN		768
 
@@ -407,6 +410,11 @@ struct dict_table_struct{
 				SELECT MAX(auto inc column) */
 	ib_longlong	autoinc;/* autoinc counter value to give to the
 				next inserted row */
+
+	ib_longlong	autoinc_increment;
+				/* The increment step of the auto increment
+				column. Value must be greater than or equal
+				to 1 */
 #ifdef UNIV_DEBUG
 	ulint		magic_n;/* magic number */
 # define DICT_TABLE_MAGIC_N	76333786
diff -Nrup a/storage/innobase/include/fsp0fsp.h b/storage/innobase/include/fsp0fsp.h
--- a/storage/innobase/include/fsp0fsp.h	2006-05-31 23:33:54 -07:00
+++ b/storage/innobase/include/fsp0fsp.h	2007-07-18 18:10:23 -07:00
@@ -245,7 +245,7 @@ will be able to insert new data to the d
 tablespace. Only free extents are taken into account and we also subtract
 the safety margin required by the above function fsp_reserve_free_extents. */
 
-ulint
+ullint
 fsp_get_available_space_in_free_extents(
 /*====================================*/
 			/* out: available space in kB */
diff -Nrup a/storage/innobase/include/ha_prototypes.h b/storage/innobase/include/ha_prototypes.h
--- a/storage/innobase/include/ha_prototypes.h	2006-05-31 23:33:56 -07:00
+++ b/storage/innobase/include/ha_prototypes.h	2007-07-18 18:10:23 -07:00
@@ -1,6 +1,9 @@
 #ifndef HA_INNODB_PROTOTYPES_H
 #define HA_INNODB_PROTOTYPES_H
 
+#include "univ.i" /* ulint, uint */
+#include "m_ctype.h" /* CHARSET_INFO */
+
 /* Prototypes for global functions in ha_innodb.cc that are called by
 InnoDB's C-code. */
 
@@ -18,5 +21,31 @@ innobase_convert_string(
 	ulint		from_length,
 	CHARSET_INFO*	from_cs,
 	uint*		errors);
+
+/**********************************************************************
+Returns true if the thread is the replication thread on the slave
+server. Used in srv_conc_enter_innodb() to determine if the thread
+should be allowed to enter InnoDB - the replication thread is treated
+differently than other threads. Also used in
+srv_conc_force_exit_innodb(). */
+
+ibool
+thd_is_replication_slave_thread(
+/*============================*/
+			/* out: true if thd is the replication thread */
+	void*	thd);	/* in: thread handle (THD*) */
+
+/**********************************************************************
+Returns true if the transaction this thread is processing has edited
+non-transactional tables. Used by the deadlock detector when deciding
+which transaction to rollback in case of a deadlock - we try to avoid
+rolling back transactions that have edited non-transactional tables. */
+
+ibool
+thd_has_edited_nontrans_tables(
+/*===========================*/
+			/* out: true if non-transactional tables have
+			been edited */
+	void*	thd);	/* in: thread handle (THD*) */
 
 #endif
diff -Nrup a/storage/innobase/include/lock0lock.h b/storage/innobase/include/lock0lock.h
--- a/storage/innobase/include/lock0lock.h	2006-04-23 01:48:21 -07:00
+++ b/storage/innobase/include/lock0lock.h	2007-07-18 18:10:23 -07:00
@@ -597,7 +597,7 @@ lock_validate(void);
 			/* out: TRUE if ok */
 /*************************************************************************
 Return approximate number or record locks (bits set in the bitmap) for
-this transaction. Since delete-marked records ma ybe removed, the
+this transaction. Since delete-marked records maybe removed, the
 record count will not be precise. */
 
 ulint
diff -Nrup a/storage/innobase/include/mem0mem.ic b/storage/innobase/include/mem0mem.ic
--- a/storage/innobase/include/mem0mem.ic	2007-03-22 14:59:25 -07:00
+++ b/storage/innobase/include/mem0mem.ic	2007-07-18 18:10:23 -07:00
@@ -167,6 +167,8 @@ mem_heap_alloc(
 	mem_block_set_free(block, free + MEM_SPACE_NEEDED(n));
 
 #ifdef UNIV_MEM_DEBUG
+	UNIV_MEM_ALLOC(buf,
+		       n + MEM_FIELD_HEADER_SIZE + MEM_FIELD_TRAILER_SIZE);
 
 	/* In the debug version write debugging info to the field */
 	mem_field_init((byte*)buf, n);
@@ -177,8 +179,10 @@ mem_heap_alloc(
 
 #endif
 #ifdef UNIV_SET_MEM_TO_ZERO
+	UNIV_MEM_ALLOC(buf, n);
 	memset(buf, '\0', n);
 #endif
+	UNIV_MEM_ALLOC(buf, n);
 	return(buf);
 }
 
@@ -369,6 +373,8 @@ mem_heap_free_top(
 	if ((heap != block) && (mem_block_get_free(block)
 				== mem_block_get_start(block))) {
 		mem_heap_block_free(heap, block);
+	} else {
+		UNIV_MEM_FREE((byte*) block + mem_block_get_free(block), n);
 	}
 }
 
diff -Nrup a/storage/innobase/include/os0file.h b/storage/innobase/include/os0file.h
--- a/storage/innobase/include/os0file.h	2006-09-04 16:16:12 -07:00
+++ b/storage/innobase/include/os0file.h	2007-07-18 18:10:23 -07:00
@@ -94,7 +94,8 @@ log. */
 #define	OS_FILE_PATH_ERROR		74
 #define	OS_FILE_AIO_RESOURCES_RESERVED	75	/* wait for OS aio resources
 						to become available again */
-#define	OS_FILE_ERROR_NOT_SPECIFIED	76
+#define	OS_FILE_SHARING_VIOLATION	76
+#define	OS_FILE_ERROR_NOT_SPECIFIED	77
 
 /* Types for aio operations */
 #define OS_FILE_READ	10
diff -Nrup a/storage/innobase/include/page0page.h b/storage/innobase/include/page0page.h
--- a/storage/innobase/include/page0page.h	2006-09-21 00:38:48 -07:00
+++ b/storage/innobase/include/page0page.h	2007-07-18 18:10:23 -07:00
@@ -531,6 +531,15 @@ page_get_free_space_of_empty(
 			/* out: free space */
 	ulint	comp)	/* in: nonzero=compact page format */
 		__attribute__((const));
+/*****************************************************************
+Calculates free space if a page is emptied. */
+
+ulint
+page_get_free_space_of_empty_noninline(
+/*===================================*/
+			/* out: free space */
+	ulint	comp)	/* in: nonzero=compact page format */
+		__attribute__((const));
 /****************************************************************
 Returns the sum of the sizes of the records in the record list
 excluding the infimum and supremum records. */
diff -Nrup a/storage/innobase/include/rem0rec.ic b/storage/innobase/include/rem0rec.ic
--- a/storage/innobase/include/rem0rec.ic	2007-03-22 15:19:54 -07:00
+++ b/storage/innobase/include/rem0rec.ic	2007-07-18 18:10:23 -07:00
@@ -795,7 +795,8 @@ UNIV_INLINE
 void
 rec_offs_set_n_alloc(
 /*=================*/
-	ulint*	offsets,	/* in: array for rec_get_offsets() */
+	ulint*	offsets,	/* out: array for rec_get_offsets(),
+				must be allocated */
 	ulint	n_alloc)	/* in: number of elements */
 {
 	ut_ad(offsets);
@@ -1282,7 +1283,8 @@ UNIV_INLINE
 void
 rec_offs_set_n_fields(
 /*==================*/
-	ulint*	offsets,	/* in: array returned by rec_get_offsets() */
+	ulint*	offsets,	/* in/out: array returned by
+				rec_get_offsets() */
 	ulint	n_fields)	/* in: number of fields */
 {
 	ut_ad(offsets);
diff -Nrup a/storage/innobase/include/row0mysql.h b/storage/innobase/include/row0mysql.h
--- a/storage/innobase/include/row0mysql.h	2006-12-20 14:13:55 -08:00
+++ b/storage/innobase/include/row0mysql.h	2007-07-18 18:10:23 -07:00
@@ -670,6 +670,7 @@ struct row_prebuilt_struct {
 					to this heap */
 	mem_heap_t*	old_vers_heap;	/* memory heap where a previous
 					version is built in consistent read */
+	ulonglong	last_value;	/* last value of AUTO-INC interval */
 	ulint		magic_n2;	/* this should be the same as
 					magic_n */
 };
diff -Nrup a/storage/innobase/include/row0sel.h b/storage/innobase/include/row0sel.h
--- a/storage/innobase/include/row0sel.h	2006-09-04 16:16:14 -07:00
+++ b/storage/innobase/include/row0sel.h	2007-07-18 18:10:24 -07:00
@@ -171,7 +171,17 @@ row_search_check_if_query_cache_permitte
 	trx_t*		trx,		/* in: transaction object */
 	const char*	norm_name);	/* in: concatenation of database name,
 					'/' char, table name */
+/***********************************************************************
+Read the max AUTOINC value from an index. */
 
+ulint
+row_search_max_autoinc(
+/*===================*/
+					/* out: DB_SUCCESS if all OK else
+					error code */
+	dict_index_t*	index,		/* in: index to search */
+	const char*	col_name,	/* in: autoinc column name */
+	ib_longlong*	value);		/* out: AUTOINC value read */
 
 /* A structure for caching column values for prefetched rows */
 struct sel_buf_struct{
diff -Nrup a/storage/innobase/include/trx0trx.h b/storage/innobase/include/trx0trx.h
--- a/storage/innobase/include/trx0trx.h	2007-04-15 09:45:11 -07:00
+++ b/storage/innobase/include/trx0trx.h	2007-07-18 18:10:24 -07:00
@@ -371,6 +371,18 @@ trx_is_interrupted(
 #define trx_is_interrupted(trx) FALSE
 #endif /* !UNIV_HOTBACKUP */
 
+/***********************************************************************
+Compares the "weight" (or size) of two transactions. The weight of one
+transaction is estimated as the number of altered rows + the number of
+locked rows. Transactions that have edited non-transactional tables are
+considered heavier than ones that have not. */
+
+int
+trx_weight_cmp(
+/*===========*/
+			/* out: <0, 0 or >0; similar to strcmp(3) */
+	trx_t*	a,	/* in: the first transaction to be compared */
+	trx_t*	b);	/* in: the second transaction to be compared */
 
 /* Signal to a transaction */
 struct trx_sig_struct{
@@ -453,7 +465,8 @@ struct trx_struct{
 	dulint		table_id;	/* table id if the preceding field is
 					TRUE */
 	/*------------------------------*/
-	int		active_trans;	/* 1 - if a transaction in MySQL
+	unsigned	duplicates:2;	/* TRX_DUP_IGNORE | TRX_DUP_REPLACE */
+	unsigned	active_trans:2;	/* 1 - if a transaction in MySQL
 					is active. 2 - if prepare_commit_mutex
 					was taken */
 	void*		mysql_thd;	/* MySQL thread handle corresponding
@@ -499,14 +512,6 @@ struct trx_struct{
 	ulint		mysql_process_no;/* since in Linux, 'top' reports
 					process id's and not thread id's, we
 					store the process number too */
-	ibool		allow_duplicates;/* normally FALSE, but if the user
-					wants to update duplicate rows,
-					(in table inserts, for example) we
-					set this TRUE */
-	ibool		replace_duplicates;/* normally FALSE, but if the user
-					wants to replace duplicate rows,
-					(in table inserts, for example) we
-					set this TRUE */
 	/*------------------------------*/
 	ulint		n_mysql_tables_in_use; /* number of Innobase tables
 					used in the processing of the current
@@ -610,7 +615,7 @@ struct trx_struct{
 					NULL */
 	ibool		was_chosen_as_deadlock_victim;
 					/* when the transaction decides to wait
-					for a lock, this it sets this to FALSE;
+					for a lock, it sets this to FALSE;
 					if another transaction chooses this
 					transaction as a victim in deadlock
 					resolution, it sets this to TRUE */
@@ -651,7 +656,12 @@ struct trx_struct{
 					cannot be any activity in the undo
 					logs! */
 	dulint		undo_no;	/* next undo log record number to
-					assign */
+					assign; since the undo log is
+					private for a transaction, this
+					is a simple ascending sequence
+					with no gaps; thus it represents
+					the number of modified/inserted
+					rows in a transaction */
 	trx_savept_t	last_sql_stat_start;
 					/* undo_no when the last sql statement
 					was started: in case of an error, trx
@@ -671,6 +681,9 @@ struct trx_struct{
 	trx_undo_arr_t*	undo_no_arr;	/* array of undo numbers of undo log
 					records which are currently processed
 					by a rollback operation */
+	ulint		n_autoinc_rows;	/* no. of AUTO-INC rows required for
+					an SQL statement. This is useful for
+					multi-row INSERTs */
 	/*------------------------------*/
 	char detailed_error[256];	/* detailed error message for last
 					error, or empty. */
@@ -681,19 +694,19 @@ struct trx_struct{
 					single operation of a
 					transaction, e.g., a parallel
 					query */
-/* Transaction concurrency states */
+/* Transaction concurrency states (trx->conc_state) */
 #define	TRX_NOT_STARTED		1
 #define	TRX_ACTIVE		2
 #define	TRX_COMMITTED_IN_MEMORY	3
 #define	TRX_PREPARED		4	/* Support for 2PC/XA */
 
-/* Transaction execution states when trx state is TRX_ACTIVE */
+/* Transaction execution states when trx->conc_state == TRX_ACTIVE */
 #define TRX_QUE_RUNNING		1	/* transaction is running */
 #define TRX_QUE_LOCK_WAIT	2	/* transaction is waiting for a lock */
 #define TRX_QUE_ROLLING_BACK	3	/* transaction is rolling back */
 #define TRX_QUE_COMMITTING	4	/* transaction is committing */
 
-/* Transaction isolation levels */
+/* Transaction isolation levels (trx->isolation_level) */
 #define TRX_ISO_READ_UNCOMMITTED	1	/* dirty read: non-locking
 						SELECTs are performed so that
 						we do not look at a possible
@@ -727,6 +740,12 @@ struct trx_struct{
 #define TRX_ISO_SERIALIZABLE		4	/* all plain SELECTs are
 						converted to LOCK IN SHARE
 						MODE reads */
+
+/* Treatment of duplicate values (trx->duplicates; for example, in inserts).
+Multiple flags can be combined with bitwise OR. */
+#define TRX_DUP_IGNORE	1	/* duplicate rows are to be updated */
+#define TRX_DUP_REPLACE	2	/* duplicate rows are to be replaced */
+
 
 /* Types of a trx signal */
 #define TRX_SIG_NO_SIGNAL		100
diff -Nrup a/storage/innobase/include/trx0undo.h b/storage/innobase/include/trx0undo.h
--- a/storage/innobase/include/trx0undo.h	2006-09-04 16:16:15 -07:00
+++ b/storage/innobase/include/trx0undo.h	2007-07-18 18:10:24 -07:00
@@ -222,13 +222,16 @@ trx_undo_lists_init(
 Assigns an undo log for a transaction. A new undo log is created or a cached
 undo log reused. */
 
-trx_undo_t*
+ulint
 trx_undo_assign_undo(
 /*=================*/
-			/* out: the undo log, NULL if did not succeed: out of
-			space */
-	trx_t*	trx,	/* in: transaction */
-	ulint	type);	/* in: TRX_UNDO_INSERT or TRX_UNDO_UPDATE */
+				/* out: DB_SUCCESS if undo log assign
+				 * successful, possible error codes are:
+				 * ER_TOO_MANY_CONCURRENT_TRXS
+				 * DB_OUT_OF_FILE_SPAC
+				 * DB_OUT_OF_MEMORY */
+	trx_t*		trx,	/* in: transaction */
+	ulint		type);	/* in: TRX_UNDO_INSERT or TRX_UNDO_UPDATE */
 /**********************************************************************
 Sets the state of the undo log segment at a transaction finish. */
 
diff -Nrup a/storage/innobase/include/univ.i b/storage/innobase/include/univ.i
--- a/storage/innobase/include/univ.i	2007-03-22 14:59:25 -07:00
+++ b/storage/innobase/include/univ.i	2007-07-18 18:10:24 -07:00
@@ -83,6 +83,8 @@ memory is read outside the allocated blo
 /* Make a non-inline debug version */
 
 #if 0
+#define UNIV_DEBUG_VALGRIND			/* Enable extra
+						Valgrind instrumentation */
 #define UNIV_DEBUG				/* Enable ut_ad() assertions */
 #define UNIV_LIST_DEBUG				/* debug UT_LIST_ macros */
 #define UNIV_MEM_DEBUG				/* detect memory leaks etc */
@@ -214,6 +216,8 @@ typedef __int64			ib_longlong;
 typedef longlong		ib_longlong;
 #endif
 
+typedef unsigned long long int	ullint;
+
 #ifndef __WIN__
 #if SIZEOF_LONG != SIZEOF_VOIDP
 #error "Error: InnoDB's ulint must be of the same size as void*"
@@ -298,5 +302,17 @@ typedef void* os_thread_ret_t;
 #include "ut0dbg.h"
 #include "ut0ut.h"
 #include "db0err.h"
+#ifdef UNIV_DEBUG_VALGRIND
+# include <valgrind/memcheck.h>
+# define UNIV_MEM_VALID(addr, size) VALGRIND_MAKE_MEM_DEFINED(addr, size)
+# define UNIV_MEM_INVALID(addr, size) VALGRIND_MAKE_MEM_UNDEFINED(addr, size)
+# define UNIV_MEM_FREE(addr, size) VALGRIND_MAKE_MEM_NOACCESS(addr, size)
+# define UNIV_MEM_ALLOC(addr, size) VALGRIND_MAKE_MEM_UNDEFINED(addr, size)
+#else
+# define UNIV_MEM_VALID(addr, size) do {} while(0)
+# define UNIV_MEM_INVALID(addr, size) do {} while(0)
+# define UNIV_MEM_FREE(addr, size) do {} while(0)
+# define UNIV_MEM_ALLOC(addr, size) do {} while(0)
+#endif
 
 #endif
diff -Nrup a/storage/innobase/include/ut0ut.h b/storage/innobase/include/ut0ut.h
--- a/storage/innobase/include/ut0ut.h	2007-01-21 17:18:09 -08:00
+++ b/storage/innobase/include/ut0ut.h	2007-07-18 18:10:24 -07:00
@@ -121,6 +121,11 @@ ut_2_power_up(
 			/* out: first power of 2 which is >= n */
 	ulint	n)	/* in: number != 0 */
 	__attribute__((const));
+
+/* Determine how many bytes (groups of 8 bits) are needed to
+store the given number of bits. */
+#define UT_BITS_IN_BYTES(b) (((b) + 7) / 8)
+
 /****************************************************************
 Sort function for ulint arrays. */
 
diff -Nrup a/storage/innobase/lock/lock0lock.c b/storage/innobase/lock/lock0lock.c
--- a/storage/innobase/lock/lock0lock.c	2007-03-22 15:19:56 -07:00
+++ b/storage/innobase/lock/lock0lock.c	2007-07-18 18:10:24 -07:00
@@ -3174,7 +3174,8 @@ lock_deadlock_occurs(
 	ulint		ret;
 	ulint		cost	= 0;
 
-	ut_ad(trx && lock);
+	ut_ad(trx);
+	ut_ad(lock);
 	ut_ad(mutex_own(&kernel_mutex));
 retry:
 	/* We check that adding this trx to the waits-for graph
@@ -3246,7 +3247,9 @@ lock_deadlock_recursive(
 	trx_t*	lock_trx;
 	ulint	ret;
 
-	ut_a(trx && start && wait_lock);
+	ut_a(trx);
+	ut_a(start);
+	ut_a(wait_lock);
 	ut_ad(mutex_own(&kernel_mutex));
 
 	if (trx->deadlock_mark == 1) {
@@ -3357,8 +3360,8 @@ lock_deadlock_recursive(
 					return(LOCK_VICTIM_IS_START);
 				}
 
-				if (ut_dulint_cmp(wait_lock->trx->undo_no,
-						  start->undo_no) >= 0) {
+				if (trx_weight_cmp(wait_lock->trx,
+						   start) >= 0) {
 					/* Our recursion starting point
 					transaction is 'smaller', let us
 					choose 'start' as the victim and roll
@@ -4423,12 +4426,9 @@ lock_table_queue_validate(
 	dict_table_t*	table)	/* in: table */
 {
 	lock_t*	lock;
-	ibool	is_waiting;
 
 	ut_ad(mutex_own(&kernel_mutex));
 
-	is_waiting = FALSE;
-
 	lock = UT_LIST_GET_FIRST(table->locks);
 
 	while (lock) {
@@ -4438,13 +4438,10 @@ lock_table_queue_validate(
 
 		if (!lock_get_wait(lock)) {
 
-			ut_a(!is_waiting);
-
 			ut_a(!lock_table_other_has_incompatible(
 				     lock->trx, 0, table,
 				     lock_get_mode(lock)));
 		} else {
-			is_waiting = TRUE;
 
 			ut_a(lock_table_has_to_wait_in_queue(lock));
 		}
diff -Nrup a/storage/innobase/log/log0log.c b/storage/innobase/log/log0log.c
--- a/storage/innobase/log/log0log.c	2007-03-22 14:59:26 -07:00
+++ b/storage/innobase/log/log0log.c	2007-07-18 18:10:24 -07:00
@@ -3039,10 +3039,22 @@ loop:
 
 	mutex_enter(&kernel_mutex);
 
-	/* Check that there are no longer transactions. We need this wait
-	even for the 'very fast' shutdown, because the InnoDB layer may have
-	committed or prepared transactions and we don't want to lose
-	them. */
+	/* We need the monitor threads to stop before we proceed with a
+	normal shutdown. In case of very fast shutdown, however, we can
+	proceed without waiting for monitor threads. */
+
+	if (srv_fast_shutdown < 2
+	   && (srv_error_monitor_active
+	      || srv_lock_timeout_and_monitor_active)) {
+
+		mutex_exit(&kernel_mutex);
+
+		goto loop;
+	}
+
+	/* Check that there are no longer transactions. We need this wait even
+	for the 'very fast' shutdown, because the InnoDB layer may have
+	committed or prepared transactions and we don't want to lose them. */
 
 	if (trx_n_mysql_transactions > 0
 	    || UT_LIST_GET_LEN(trx_sys->trx_list) > 0) {
@@ -3163,21 +3175,7 @@ loop:
 		goto loop;
 	}
 
-	/* The lock timeout thread should now have exited */
-
-	if (srv_lock_timeout_and_monitor_active) {
-
-		goto loop;
-	}
-
-	/* We now let also the InnoDB error monitor thread to exit */
-
 	srv_shutdown_state = SRV_SHUTDOWN_LAST_PHASE;
-
-	if (srv_error_monitor_active) {
-
-		goto loop;
-	}
 
 	/* Make some checks that the server really is quiet */
 	ut_a(srv_n_threads_active[SRV_MASTER] == 0);
diff -Nrup a/storage/innobase/log/log0recv.c b/storage/innobase/log/log0recv.c
--- a/storage/innobase/log/log0recv.c	2007-03-22 14:59:26 -07:00
+++ b/storage/innobase/log/log0recv.c	2007-07-18 18:10:24 -07:00
@@ -57,6 +57,16 @@ ibool		recv_needed_recovery = FALSE;
 
 ibool		recv_lsn_checks_on = FALSE;
 
+/* There are two conditions under which we scan the logs, the first
+is normal startup and the second is when we do a recovery from an
+archive.
+This flag is set if we are doing a scan from the last checkpoint during
+startup. If we find log entries that were written after the last checkpoint
+we know that the server was not cleanly shutdown. We must then initialize
+the crash recovery environment before attempting to store these entries in
+the log hash table. */
+ibool	recv_log_scan_is_startup_type = FALSE;
+
 /* If the following is TRUE, the buffer pool file pages must be invalidated
 after recovery and no ibuf operations are allowed; this becomes TRUE if
 the log record hash table becomes too full, and log records must be merged
@@ -99,6 +109,16 @@ the recovery failed and the database may
 
 dulint	recv_max_page_lsn;
 
+/* prototypes */
+
+/***********************************************************
+Initialize crash recovery environment. Can be called iff
+recv_needed_recovery == FALSE. */
+static
+void 
+recv_init_crash_recovery(void);
+/*===========================*/
+
 /************************************************************
 Creates the recovery system. */
 
@@ -2284,6 +2304,23 @@ recv_scan_log_recs(
 
 		if (ut_dulint_cmp(scanned_lsn, recv_sys->scanned_lsn) > 0) {
 
+			/* We have found more entries. If this scan is
+ 			of startup type, we must initiate crash recovery
+			environment before parsing these log records. */
+
+			if (recv_log_scan_is_startup_type
+			    && !recv_needed_recovery) {
+
+				fprintf(stderr,
+					"InnoDB: Log scan progressed"
+					" past the checkpoint lsn %lu %lu\n",
+					(ulong) ut_dulint_get_high(
+						recv_sys->scanned_lsn),
+					(ulong) ut_dulint_get_low(
+						recv_sys->scanned_lsn));
+				recv_init_crash_recovery();
+			}
+
 			/* We were able to find more log data: add it to the
 			parsing buffer if parse_start_lsn is already
 			non-zero */
@@ -2405,6 +2442,48 @@ recv_group_scan_log_recs(
 #endif /* UNIV_DEBUG */
 }
 
+/***********************************************************
+Initialize crash recovery environment. Can be called iff
+recv_needed_recovery == FALSE. */
+static
+void
+recv_init_crash_recovery(void)
+/*==========================*/
+{
+	ut_a(!recv_needed_recovery); 
+
+	recv_needed_recovery = TRUE;
+
+	ut_print_timestamp(stderr);
+
+	fprintf(stderr,
+		"  InnoDB: Database was not"
+		" shut down normally!\n"
+		"InnoDB: Starting crash recovery.\n");
+
+	fprintf(stderr,
+		"InnoDB: Reading tablespace information"
+		" from the .ibd files...\n");
+
+	fil_load_single_table_tablespaces();
+
+	/* If we are using the doublewrite method, we will
+	check if there are half-written pages in data files,
+	and restore them from the doublewrite buffer if
+	possible */
+
+	if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
+
+		fprintf(stderr,
+			"InnoDB: Restoring possible"
+			" half-written data pages from"
+			" the doublewrite\n"
+			"InnoDB: buffer...\n");
+		trx_sys_doublewrite_init_or_restore_pages(TRUE);
+	}
+	
+}
+
 /************************************************************
 Recovers from a checkpoint. When this function returns, the database is able
 to start processing of new user transactions, but the function
@@ -2532,92 +2611,6 @@ recv_recovery_from_checkpoint_start(
 		recv_sys->recovered_lsn = checkpoint_lsn;
 
 		srv_start_lsn = checkpoint_lsn;
-
-		/* NOTE: we always do a 'recovery' at startup, but only if
-		there is something wrong we will print a message to the
-		user about recovery: */
-
-		if (ut_dulint_cmp(checkpoint_lsn, max_flushed_lsn) != 0
-		    || ut_dulint_cmp(checkpoint_lsn, min_flushed_lsn) != 0) {
-
-			if (ut_dulint_cmp(checkpoint_lsn, max_flushed_lsn)
-			    < 0) {
-				fprintf(stderr,
-					"InnoDB: #########################"
-					"#################################\n"
-					"InnoDB:                          "
-					"WARNING!\n"
-					"InnoDB: The log sequence number"
-					" in ibdata files is higher\n"
-					"InnoDB: than the log sequence number"
-					" in the ib_logfiles! Are you sure\n"
-					"InnoDB: you are using the right"
-					" ib_logfiles to start up"
-					" the database?\n"
-					"InnoDB: Log sequence number in"
-					" ib_logfiles is %lu %lu, log\n"
-					"InnoDB: sequence numbers stamped"
-					" to ibdata file headers are between\n"
-					"InnoDB: %lu %lu and %lu %lu.\n"
-					"InnoDB: #########################"
-					"#################################\n",
-					(ulong) ut_dulint_get_high(
-						checkpoint_lsn),
-					(ulong) ut_dulint_get_low(
-						checkpoint_lsn),
-					(ulong) ut_dulint_get_high(
-						min_flushed_lsn),
-					(ulong) ut_dulint_get_low(
-						min_flushed_lsn),
-					(ulong) ut_dulint_get_high(
-						max_flushed_lsn),
-					(ulong) ut_dulint_get_low(
-						max_flushed_lsn));
-			}
-
-			recv_needed_recovery = TRUE;
-
-			ut_print_timestamp(stderr);
-
-			fprintf(stderr,
-				"  InnoDB: Database was not"
-				" shut down normally!\n"
-				"InnoDB: Starting crash recovery.\n");
-
-			fprintf(stderr,
-				"InnoDB: Reading tablespace information"
-				" from the .ibd files...\n");
-
-			fil_load_single_table_tablespaces();
-
-			/* If we are using the doublewrite method, we will
-			check if there are half-written pages in data files,
-			and restore them from the doublewrite buffer if
-			possible */
-
-			if (srv_force_recovery < SRV_FORCE_NO_LOG_REDO) {
-
-				fprintf(stderr,
-					"InnoDB: Restoring possible"
-					" half-written data pages from"
-					" the doublewrite\n"
-					"InnoDB: buffer...\n");
-				trx_sys_doublewrite_init_or_restore_pages(
-					TRUE);
-			}
-
-			ut_print_timestamp(stderr);
-
-			fprintf(stderr,
-				"  InnoDB: Starting log scan"
-				" based on checkpoint at\n"
-				"InnoDB: log sequence number %lu %lu.\n",
-				(ulong) ut_dulint_get_high(checkpoint_lsn),
-				(ulong) ut_dulint_get_low(checkpoint_lsn));
-		} else {
-			/* Init the doublewrite buffer memory structure */
-			trx_sys_doublewrite_init_or_restore_pages(FALSE);
-		}
 	}
 
 	contiguous_lsn = ut_dulint_align_down(recv_sys->scanned_lsn,
@@ -2670,6 +2663,8 @@ recv_recovery_from_checkpoint_start(
 		group = UT_LIST_GET_NEXT(log_groups, group);
 	}
 
+	/* Set the flag to publish that we are doing startup scan. */
+	recv_log_scan_is_startup_type = (type == LOG_CHECKPOINT);
 	while (group) {
 		old_scanned_lsn = recv_sys->scanned_lsn;
 
@@ -2691,6 +2686,69 @@ recv_recovery_from_checkpoint_start(
 		group = UT_LIST_GET_NEXT(log_groups, group);
 	}
 
+	/* Done with startup scan. Clear the flag. */
+	recv_log_scan_is_startup_type = FALSE;
+	if (type == LOG_CHECKPOINT) {
+		/* NOTE: we always do a 'recovery' at startup, but only if
+		there is something wrong we will print a message to the
+		user about recovery: */
+
+		if (ut_dulint_cmp(checkpoint_lsn, max_flushed_lsn) != 0
+		    || ut_dulint_cmp(checkpoint_lsn, min_flushed_lsn) != 0) {
+
+			if (ut_dulint_cmp(checkpoint_lsn, max_flushed_lsn)
+			    < 0) {
+				fprintf(stderr,
+					"InnoDB: #########################"
+					"#################################\n"
+					"InnoDB:                          "
+					"WARNING!\n"
+					"InnoDB: The log sequence number"
+					" in ibdata files is higher\n"
+					"InnoDB: than the log sequence number"
+					" in the ib_logfiles! Are you sure\n"
+					"InnoDB: you are using the right"
+					" ib_logfiles to start up"
+					" the database?\n"
+					"InnoDB: Log sequence number in"
+					" ib_logfiles is %lu %lu, log\n"
+					"InnoDB: sequence numbers stamped"
+					" to ibdata file headers are between\n"
+					"InnoDB: %lu %lu and %lu %lu.\n"
+					"InnoDB: #########################"
+					"#################################\n",
+					(ulong) ut_dulint_get_high(
+						checkpoint_lsn),
+					(ulong) ut_dulint_get_low(
+						checkpoint_lsn),
+					(ulong) ut_dulint_get_high(
+						min_flushed_lsn),
+					(ulong) ut_dulint_get_low(
+						min_flushed_lsn),
+					(ulong) ut_dulint_get_high(
+						max_flushed_lsn),
+					(ulong) ut_dulint_get_low(
+						max_flushed_lsn));
+
+
+			}
+
+			if (!recv_needed_recovery) {
+				fprintf(stderr,
+					"InnoDB: The log sequence number"
+					" in ibdata files does not match\n"
+					"InnoDB: the log sequence number"
+					" in the ib_logfiles!\n");
+				recv_init_crash_recovery();
+			}
+
+		}
+		if (!recv_needed_recovery) {
+			/* Init the doublewrite buffer memory structure */
+			trx_sys_doublewrite_init_or_restore_pages(FALSE);
+		}
+	}
+
 	/* We currently have only one log group */
 	if (ut_dulint_cmp(group_scanned_lsn, checkpoint_lsn) < 0) {
 		ut_print_timestamp(stderr);
@@ -2747,20 +2805,9 @@ recv_recovery_from_checkpoint_start(
 	recv_synchronize_groups(up_to_date_group);
 
 	if (!recv_needed_recovery) {
-		if (ut_dulint_cmp(checkpoint_lsn, recv_sys->recovered_lsn)
-		    != 0) {
-			fprintf(stderr,
-				"InnoDB: Warning: we did not need to do"
-				" crash recovery, but log scan\n"
-				"InnoDB: progressed past the checkpoint"
-				" lsn %lu %lu up to lsn %lu %lu\n",
-				(ulong) ut_dulint_get_high(checkpoint_lsn),
-				(ulong) ut_dulint_get_low(checkpoint_lsn),
-				(ulong) ut_dulint_get_high(
-					recv_sys->recovered_lsn),
-				(ulong) ut_dulint_get_low(
-					recv_sys->recovered_lsn));
-		}
+		ut_a(ut_dulint_cmp(checkpoint_lsn, 
+				   recv_sys->recovered_lsn) == 0);
+
 	} else {
 		srv_start_lsn = recv_sys->recovered_lsn;
 	}
diff -Nrup a/storage/innobase/mem/mem0mem.c b/storage/innobase/mem/mem0mem.c
--- a/storage/innobase/mem/mem0mem.c	2006-09-04 16:16:18 -07:00
+++ b/storage/innobase/mem/mem0mem.c	2007-07-18 18:10:24 -07:00
@@ -514,6 +514,7 @@ mem_heap_block_free(
 	mem_erase_buf((byte*)block, len);
 
 #endif
+	UNIV_MEM_FREE(block, len);
 
 	if (init_block) {
 		/* Do not have to free: do nothing */
diff -Nrup a/storage/innobase/mem/mem0pool.c b/storage/innobase/mem/mem0pool.c
--- a/storage/innobase/mem/mem0pool.c	2007-03-22 14:59:26 -07:00
+++ b/storage/innobase/mem/mem0pool.c	2007-07-18 18:10:25 -07:00
@@ -229,6 +229,8 @@ mem_pool_create(
 
 		mem_area_set_size(area, ut_2_exp(i));
 		mem_area_set_free(area, TRUE);
+		UNIV_MEM_FREE(MEM_AREA_EXTRA_SIZE + (byte*) area,
+			      ut_2_exp(i) - MEM_AREA_EXTRA_SIZE);
 
 		UT_LIST_ADD_FIRST(free_list, pool->free_list[i], area);
 
@@ -300,6 +302,7 @@ mem_pool_fill_free_list(
 	UT_LIST_REMOVE(free_list, pool->free_list[i + 1], area);
 
 	area2 = (mem_area_t*)(((byte*)area) + ut_2_exp(i));
+	UNIV_MEM_ALLOC(area2, MEM_AREA_EXTRA_SIZE);
 
 	mem_area_set_size(area2, ut_2_exp(i));
 	mem_area_set_free(area2, TRUE);
@@ -400,6 +403,8 @@ mem_area_alloc(
 	mutex_exit(&(pool->mutex));
 
 	ut_ad(mem_pool_validate(pool));
+	UNIV_MEM_ALLOC(MEM_AREA_EXTRA_SIZE + (byte*)area,
+		       ut_2_exp(n) - MEM_AREA_EXTRA_SIZE);
 
 	return((void*)(MEM_AREA_EXTRA_SIZE + ((byte*)area)));
 }
@@ -482,6 +487,7 @@ mem_area_free(
 	}
 
 	size = mem_area_get_size(area);
+	UNIV_MEM_FREE(ptr, size - MEM_AREA_EXTRA_SIZE);
 
 	if (size == 0) {
 		fprintf(stderr,
diff -Nrup a/storage/innobase/os/os0file.c b/storage/innobase/os/os0file.c
--- a/storage/innobase/os/os0file.c	2007-04-18 19:00:40 -07:00
+++ b/storage/innobase/os/os0file.c	2007-07-18 18:10:25 -07:00
@@ -250,6 +250,15 @@ os_file_get_last_error(
 				"InnoDB: the directory. It may also be"
 				" you have created a subdirectory\n"
 				"InnoDB: of the same name as a data file.\n");
+		} else if (err == ERROR_SHARING_VIOLATION
+			   || err == ERROR_LOCK_VIOLATION) {
+			fprintf(stderr,
+				"InnoDB: The error means that another program"
+				" is using InnoDB's files.\n"
+				"InnoDB: This might be a backup or antivirus"
+				" software or another instance\n"
+				"InnoDB: of MySQL."
+				" Please close it to get rid of this error.\n");
 		} else {
 			fprintf(stderr,
 				"InnoDB: Some operating system error numbers"
@@ -268,6 +277,9 @@ os_file_get_last_error(
 		return(OS_FILE_DISK_FULL);
 	} else if (err == ERROR_FILE_EXISTS) {
 		return(OS_FILE_ALREADY_EXISTS);
+	} else if (err == ERROR_SHARING_VIOLATION
+		   || err == ERROR_LOCK_VIOLATION) {
+		return(OS_FILE_SHARING_VIOLATION);
 	} else {
 		return(100 + err);
 	}
@@ -388,6 +400,10 @@ os_file_handle_error_cond_exit(
 		   || err == OS_FILE_PATH_ERROR) {
 
 		return(FALSE);
+	} else if (err == OS_FILE_SHARING_VIOLATION) {
+
+		os_thread_sleep(10000000);  /* 10 sec */
+		return(TRUE);
 	} else {
 		if (name) {
 			fprintf(stderr, "InnoDB: File name %s\n", name);
@@ -440,10 +456,9 @@ os_file_handle_error_no_exit(
 
 #undef USE_FILE_LOCK
 #define USE_FILE_LOCK
-#if defined(UNIV_HOTBACKUP) || defined(__WIN__) || defined(__FreeBSD__) || defined(__NETWARE__)
+#if defined(UNIV_HOTBACKUP) || defined(__WIN__) || defined(__NETWARE__)
 /* InnoDB Hot Backup does not lock the data files.
  * On Windows, mandatory locking is used.
- * On FreeBSD with LinuxThreads, advisory locking does not work properly.
  */
 # undef USE_FILE_LOCK
 #endif
diff -Nrup a/storage/innobase/page/page0page.c b/storage/innobase/page/page0page.c
--- a/storage/innobase/page/page0page.c	2006-09-21 00:38:51 -07:00
+++ b/storage/innobase/page/page0page.c	2007-07-18 18:10:25 -07:00
@@ -209,6 +209,18 @@ page_set_max_trx_id(
 	}
 }
 
+/*****************************************************************
+Calculates free space if a page is emptied. */
+
+ulint
+page_get_free_space_of_empty_noninline(
+/*===================================*/
+			/* out: free space */
+	ulint	comp)	/* in: nonzero=compact page format */
+{
+	return(page_get_free_space_of_empty(comp));
+}
+
 /****************************************************************
 Allocates a block of memory from an index page. */
 
diff -Nrup a/storage/innobase/rem/rem0rec.c b/storage/innobase/rem/rem0rec.c
--- a/storage/innobase/rem/rem0rec.c	2006-09-21 00:38:52 -07:00
+++ b/storage/innobase/rem/rem0rec.c	2007-07-18 18:10:25 -07:00
@@ -153,7 +153,6 @@ static
 void
 rec_init_offsets(
 /*=============*/
-				/* out: the offsets */
 	rec_t*		rec,	/* in: physical record */
 	dict_index_t*	index,	/* in: record descriptor */
 	ulint*		offsets)/* in/out: array of offsets;
@@ -189,7 +188,7 @@ rec_init_offsets(
 		}
 
 		nulls = rec - (REC_N_NEW_EXTRA_BYTES + 1);
-		lens = nulls - (index->n_nullable + 7) / 8;
+		lens = nulls - UT_BITS_IN_BYTES(index->n_nullable);
 		offs = 0;
 		null_mask = 1;
 
@@ -304,7 +303,7 @@ rec_get_offsets_func(
 				/* out: the new offsets */
 	rec_t*		rec,	/* in: physical record */
 	dict_index_t*	index,	/* in: record descriptor */
-	ulint*		offsets,/* in: array consisting of offsets[0]
+	ulint*		offsets,/* in/out: array consisting of offsets[0]
 				allocated elements, or an array from
 				rec_get_offsets(), or NULL */
 	ulint		n_fields,/* in: maximum number of initialized fields
@@ -440,7 +439,7 @@ rec_get_converted_size_new(
 	dtuple_t*	dtuple)	/* in: data tuple */
 {
 	ulint		size		= REC_N_NEW_EXTRA_BYTES
-		+ (index->n_nullable + 7) / 8;
+		+ UT_BITS_IN_BYTES(index->n_nullable);
 	ulint		i;
 	ulint		n_fields;
 	ut_ad(index && dtuple);
@@ -459,10 +458,10 @@ rec_get_converted_size_new(
 		break;
 	case REC_STATUS_INFIMUM:
 	case REC_STATUS_SUPREMUM:
-		/* infimum or supremum record, 8 bytes */
-		return(size + 8); /* no extra data needed */
+		/* infimum or supremum record, 8 data bytes */
+		return(REC_N_NEW_EXTRA_BYTES + 8);
 	default:
-		ut_a(0);
+		ut_error;
 		return(ULINT_UNDEFINED);
 	}
 
@@ -476,21 +475,31 @@ rec_get_converted_size_new(
 		len = dtuple_get_nth_field(dtuple, i)->len;
 		col = dict_field_get_col(field);
 
-		ut_ad(len != UNIV_SQL_NULL || !(col->prtype & DATA_NOT_NULL));
+		ut_ad(dict_col_type_assert_equal(
+			      col, dfield_get_type(dtuple_get_nth_field(
+							   dtuple, i))));
 
 		if (len == UNIV_SQL_NULL) {
 			/* No length is stored for NULL fields. */
+			ut_ad(!(col->prtype & DATA_NOT_NULL));
 			continue;
 		}
 
 		ut_ad(len <= col->len || col->mtype == DATA_BLOB);
-		ut_ad(!field->fixed_len || len == field->fixed_len);
 
 		if (field->fixed_len) {
+			ut_ad(len == field->fixed_len);
+			/* dict_index_add_col() should guarantee this */
+			ut_ad(!field->prefix_len
+			      || field->fixed_len == field->prefix_len);
 		} else if (len < 128
 			   || (col->len < 256 && col->mtype != DATA_BLOB)) {
 			size++;
 		} else {
+			/* For variable-length columns, we look up the
+			maximum length from the column itself.  If this
+			is a prefix index column shorter than 256 bytes,
+			this will waste one byte. */
 			size += 2;
 		}
 		size += len;
@@ -586,7 +595,7 @@ rec_set_nth_field_extern_bit_new(
 				we do not write to log about the change */
 {
 	byte*		nulls	= rec - (REC_N_NEW_EXTRA_BYTES + 1);
-	byte*		lens	= nulls - (index->n_nullable + 7) / 8;
+	byte*		lens	= nulls - UT_BITS_IN_BYTES(index->n_nullable);
 	ulint		i;
 	ulint		n_fields;
 	ulint		null_mask	= 1;
@@ -744,7 +753,11 @@ rec_convert_dtuple_to_rec_old(
 	/* Calculate the offset of the origin in the physical record */
 
 	rec = buf + rec_get_converted_extra_size(data_size, n_fields);
-
+#ifdef UNIV_DEBUG
+	/* Suppress Valgrind warnings of ut_ad()
+	in mach_write_to_1(), mach_write_to_2() et al. */
+	memset(buf, 0xff, rec - buf + data_size);
+#endif /* UNIV_DEBUG */
 	/* Store the number of fields */
 	rec_set_n_fields_old(rec, n_fields);
 
@@ -875,7 +888,7 @@ rec_convert_dtuple_to_rec_new(
 
 	/* Calculate the offset of the origin in the physical record.
 	We must loop over all fields to do this. */
-	rec += (index->n_nullable + 7) / 8;
+	rec += UT_BITS_IN_BYTES(index->n_nullable);
 
 	for (i = 0; i < n_fields; i++) {
 		if (UNIV_UNLIKELY(i == n_node_ptr_field)) {
@@ -892,6 +905,11 @@ rec_convert_dtuple_to_rec_new(
 		len = dfield_get_len(field);
 		fixed_len = dict_index_get_nth_field(index, i)->fixed_len;
 
+		ut_ad(dict_col_type_assert_equal(
+			      dict_field_get_col(dict_index_get_nth_field(
+							 index, i)),
+			      dfield_get_type(field)));
+
 		if (!(dtype_get_prtype(type) & DATA_NOT_NULL)) {
 			if (len == UNIV_SQL_NULL)
 				continue;
@@ -915,7 +933,7 @@ rec_convert_dtuple_to_rec_new(
 init:
 	end = rec;
 	nulls = rec - (REC_N_NEW_EXTRA_BYTES + 1);
-	lens = nulls - (index->n_nullable + 7) / 8;
+	lens = nulls - UT_BITS_IN_BYTES(index->n_nullable);
 	/* clear the SQL-null flags */
 	memset (lens + 1, 0, nulls - lens);
 
@@ -1172,7 +1190,7 @@ rec_copy_prefix_to_buf(
 	}
 
 	nulls = rec - (REC_N_NEW_EXTRA_BYTES + 1);
-	lens = nulls - (index->n_nullable + 7) / 8;
+	lens = nulls - UT_BITS_IN_BYTES(index->n_nullable);
 	UNIV_PREFETCH_R(lens);
 	prefix_len = 0;
 	null_mask = 1;
diff -Nrup a/storage/innobase/row/row0ins.c b/storage/innobase/row/row0ins.c
--- a/storage/innobase/row/row0ins.c	2007-01-04 18:51:30 -08:00
+++ b/storage/innobase/row/row0ins.c	2007-07-18 18:10:25 -07:00
@@ -51,21 +51,6 @@ innobase_invalidate_query_cache(
 	ulint	full_name_len);	/* in: full name length where also the null
 				chars count */
 
-/**********************************************************************
-This function returns true if
-
-1) SQL-query in the current thread
-is either REPLACE or LOAD DATA INFILE REPLACE.
-
-2) SQL-query in the current thread
-is INSERT ON DUPLICATE KEY UPDATE.
-
-NOTE that /mysql/innobase/row/row0ins.c must contain the
-prototype for this function ! */
-
-ibool
-innobase_query_is_update(void);
-
 /*************************************************************************
 Creates an insert node struct. */
 
@@ -448,7 +433,11 @@ row_ins_cascade_calc_update_vec(
 	ulint		i;
 	ulint		j;
 
-	ut_a(node && foreign && cascade && table && index);
+	ut_a(node);
+	ut_a(foreign);
+	ut_a(cascade);
+	ut_a(table);
+	ut_a(index);
 
 	/* Calculate the appropriate update vector which will set the fields
 	in the child index record to the same value (possibly padded with
@@ -791,7 +780,10 @@ row_ins_foreign_check_on_constraint(
 	trx_t*		trx;
 	mem_heap_t*	tmp_heap	= NULL;
 
-	ut_a(thr && foreign && pcur && mtr);
+	ut_a(thr);
+	ut_a(foreign);
+	ut_a(pcur);
+	ut_a(mtr);
 
 	trx = thr_get_trx(thr);
 
@@ -1308,7 +1300,8 @@ run_again:
 		goto exit_func;
 	}
 
-	ut_a(check_table && check_index);
+	ut_a(check_table);
+	ut_a(check_index);
 
 	if (check_table != table) {
 		/* We already have a LOCK_IX on table, but not necessarily
@@ -1336,11 +1329,9 @@ run_again:
 	/* Scan index records and check if there is a matching record */
 
 	for (;;) {
-		page_t*	page;
 		rec = btr_pcur_get_rec(&pcur);
-		page = buf_frame_align(rec);
 
-		if (rec == page_get_infimum_rec(page)) {
+		if (page_rec_is_infimum(rec)) {
 
 			goto next_rec;
 		}
@@ -1348,7 +1339,7 @@ run_again:
 		offsets = rec_get_offsets(rec, check_index,
 					  offsets, ULINT_UNDEFINED, &heap);
 
-		if (rec == page_get_supremum_rec(page)) {
+		if (page_rec_is_supremum(rec)) {
 
 			err = row_ins_set_shared_rec_lock(
 				LOCK_ORDINARY, rec, check_index, offsets, thr);
@@ -1654,6 +1645,7 @@ row_ins_scan_sec_index_for_duplicate(
 	btr_pcur_t	pcur;
 	ulint		err		= DB_SUCCESS;
 	ibool		moved;
+	unsigned	allow_duplicates;
 	mtr_t		mtr;
 	mem_heap_t*	heap		= NULL;
 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
@@ -1684,12 +1676,14 @@ row_ins_scan_sec_index_for_duplicate(
 
 	btr_pcur_open(index, entry, PAGE_CUR_GE, BTR_SEARCH_LEAF, &pcur, &mtr);
 
+	allow_duplicates = thr_get_trx(thr)->duplicates & TRX_DUP_IGNORE;
+
 	/* Scan index records and check if there is a duplicate */
 
 	for (;;) {
 		rec = btr_pcur_get_rec(&pcur);
 
-		if (rec == page_get_infimum_rec(buf_frame_align(rec))) {
+		if (page_rec_is_infimum(rec)) {
 
 			goto next_rec;
 		}
@@ -1697,7 +1691,7 @@ row_ins_scan_sec_index_for_duplicate(
 		offsets = rec_get_offsets(rec, index, offsets,
 					  ULINT_UNDEFINED, &heap);
 
-		if (innobase_query_is_update()) {
+		if (allow_duplicates) {
 
 			/* If the SQL-query will update or replace
 			duplicate key we will take X-lock for
@@ -1826,7 +1820,7 @@ row_ins_duplicate_error_in_clust(
 			sure that in roll-forward we get the same duplicate
 			errors as in original execution */
 
-			if (innobase_query_is_update()) {
+			if (trx->duplicates & TRX_DUP_IGNORE) {
 
 				/* If the SQL-query will update or replace
 				duplicate key we will take X-lock for
@@ -1864,7 +1858,7 @@ row_ins_duplicate_error_in_clust(
 			offsets = rec_get_offsets(rec, cursor->index, offsets,
 						  ULINT_UNDEFINED, &heap);
 
-			if (innobase_query_is_update()) {
+			if (trx->duplicates & TRX_DUP_IGNORE) {
 
 				/* If the SQL-query will update or replace
 				duplicate key we will take X-lock for
diff -Nrup a/storage/innobase/row/row0mysql.c b/storage/innobase/row/row0mysql.c
--- a/storage/innobase/row/row0mysql.c	2007-03-22 14:59:26 -07:00
+++ b/storage/innobase/row/row0mysql.c	2007-07-18 18:10:25 -07:00
@@ -476,7 +476,8 @@ handle_new_error:
 		/* MySQL will roll back the latest SQL statement */
 	} else if (err == DB_ROW_IS_REFERENCED
 		   || err == DB_NO_REFERENCED_ROW
-		   || err == DB_CANNOT_ADD_CONSTRAINT) {
+		   || err == DB_CANNOT_ADD_CONSTRAINT
+		   || err == DB_TOO_MANY_CONCURRENT_TRXS) {
 		if (savept) {
 			/* Roll back the latest, possibly incomplete
 			insertion or update */
@@ -654,6 +655,8 @@ row_create_prebuilt(
 
 	prebuilt->old_vers_heap = NULL;
 
+	prebuilt->last_value = 0;
+
 	return(prebuilt);
 }
 
@@ -2893,6 +2896,8 @@ next_rec:
 		dict_table_change_id_in_cache(table, new_id);
 	}
 
+	/* MySQL calls ha_innobase::reset_auto_increment() which does
+	the same thing. */
 	dict_table_autoinc_initialize(table, 0);
 	dict_update_statistics(table);
 
diff -Nrup a/storage/innobase/row/row0row.c b/storage/innobase/row/row0row.c
--- a/storage/innobase/row/row0row.c	2006-09-22 00:00:17 -07:00
+++ b/storage/innobase/row/row0row.c	2007-07-18 18:10:25 -07:00
@@ -142,20 +142,15 @@ row_build_index_entry(
 		dfield_copy(dfield, dfield2);
 
 		/* If a column prefix index, take only the prefix */
-		if (ind_field->prefix_len) {
-			if (dfield_get_len(dfield2) != UNIV_SQL_NULL) {
+		if (ind_field->prefix_len > 0
+		    && dfield_get_len(dfield2) != UNIV_SQL_NULL) {
 
-				storage_len = dtype_get_at_most_n_mbchars(
-					col->prtype,
-					col->mbminlen, col->mbmaxlen,
-					ind_field->prefix_len,
-					dfield_get_len(dfield2),
-					dfield2->data);
+			storage_len = dtype_get_at_most_n_mbchars(
+				col->prtype, col->mbminlen, col->mbmaxlen,
+				ind_field->prefix_len,
+				dfield_get_len(dfield2), dfield2->data);
 
-				dfield_set_len(dfield, storage_len);
-			}
-
-			dfield_get_type(dfield)->len = ind_field->prefix_len;
+			dfield_set_len(dfield, storage_len);
 		}
 	}
 
@@ -478,7 +473,9 @@ row_build_row_ref_in_tuple(
 	ulint*		offsets		= offsets_;
 	*offsets_ = (sizeof offsets_) / sizeof *offsets_;
 
-	ut_a(ref && index && rec);
+	ut_a(ref);
+	ut_a(index);
+	ut_a(rec);
 
 	if (UNIV_UNLIKELY(!index->table)) {
 		fputs("InnoDB: table ", stderr);
diff -Nrup a/storage/innobase/row/row0sel.c b/storage/innobase/row/row0sel.c
--- a/storage/innobase/row/row0sel.c	2007-02-23 03:13:50 -08:00
+++ b/storage/innobase/row/row0sel.c	2007-07-18 18:10:25 -07:00
@@ -3619,6 +3619,32 @@ shortcut_fails_too_big_rec:
 					   pcur, 0, &mtr);
 
 		pcur->trx_if_known = trx;
+
+		rec = btr_pcur_get_rec(pcur);
+
+		if (!moves_up
+		    && !page_rec_is_supremum(rec)
+		    && set_also_gap_locks
+		    && !(srv_locks_unsafe_for_binlog
+			 || trx->isolation_level == TRX_ISO_READ_COMMITTED)
+		    && prebuilt->select_lock_type != LOCK_NONE) {
+
+			/* Try to place a gap lock on the next index record
+			to prevent phantoms in ORDER BY ... DESC queries */
+
+			offsets = rec_get_offsets(page_rec_get_next(rec),
+						  index, offsets,
+						  ULINT_UNDEFINED, &heap);
+			err = sel_set_rec_lock(page_rec_get_next(rec),
+					       index, offsets,
+					       prebuilt->select_lock_type,
+					       LOCK_GAP, thr);
+
+			if (err != DB_SUCCESS) {
+
+				goto lock_wait_or_error;
+			}
+		}
 	} else {
 		if (mode == PAGE_CUR_G) {
 			btr_pcur_open_at_index_side(
@@ -4492,4 +4518,170 @@ row_search_check_if_query_cache_permitte
 	mutex_exit(&kernel_mutex);
 
 	return(ret);
+}
+
+/***********************************************************************
+Read the AUTOINC column from the current row. */
+static
+ib_longlong
+row_search_autoinc_read_column(
+/*===========================*/
+					/* out: value read from the column */
+	dict_index_t*	index,		/* in: index to read from */
+	const rec_t*	rec,		/* in: current rec */
+	ulint		col_no,		/* in: column number */
+	ibool		unsigned_type)	/* in: signed or unsigned flag */
+{
+	ulint		len;
+	byte*		ptr;
+	const byte*	data;
+	ib_longlong	value;
+	mem_heap_t*	heap = NULL;
+	byte		dest[sizeof(value)];
+	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
+	ulint*		offsets	= offsets_;
+
+	*offsets_ = sizeof offsets_ / sizeof *offsets_;
+
+	/* TODO: We have to cast away the const of rec for now.  This needs
+	to be fixed later.*/
+	offsets = rec_get_offsets(
+		(rec_t*) rec, index, offsets, ULINT_UNDEFINED, &heap);
+
+	/* TODO: We have to cast away the const of rec for now.  This needs
+	to be fixed later.*/
+	data = rec_get_nth_field((rec_t*)rec, offsets, col_no, &len);
+
+	ut_a(len != UNIV_SQL_NULL);
+	ut_a(len <= sizeof value);
+
+	/* Convert integer data from Innobase to a little-endian format,
+	sign bit restored to normal */
+
+	for (ptr = dest + len; ptr != dest; ++data) {
+		--ptr;
+		*ptr = *data;
+	}
+
+	if (!unsigned_type) {
+		dest[len - 1] ^= 128;
+	}
+
+	/* The assumption here is that the AUTOINC value can't be negative.*/
+	switch (len) {
+	case 8:
+		value = *(ib_longlong*) ptr;
+		break;
+
+	case 4:
+		value = *(ib_uint32_t*) ptr;
+		break;
+
+	case 2:
+		value = *(uint16 *) ptr;
+		break;
+
+	case 1:
+		value = *ptr;
+		break;
+
+	default:
+		ut_error;
+	}
+
+	if (UNIV_LIKELY_NULL(heap)) {
+		mem_heap_free(heap);
+	}
+
+	ut_a(value >= 0);
+
+	return(value);
+}
+
+/***********************************************************************
+Get the last row. */
+static
+const rec_t*
+row_search_autoinc_get_rec(
+/*=======================*/
+					/* out: current rec or NULL */
+	btr_pcur_t*	pcur,		/* in: the current cursor */
+	mtr_t*		mtr)		/* in: mini transaction */
+{
+	do {
+		const rec_t* rec = btr_pcur_get_rec(pcur);
+
+		if (page_rec_is_user_rec(rec)) {
+			return(rec);
+		}
+	} while (btr_pcur_move_to_prev(pcur, mtr));
+
+	return(NULL);
+}
+
+/***********************************************************************
+Read the max AUTOINC value from an index. */
+
+ulint
+row_search_max_autoinc(
+/*===================*/
+					/* out: DB_SUCCESS if all OK else
+					error code, DB_RECORD_NOT_FOUND if
+					column name can't be found in index */
+	dict_index_t*	index,		/* in: index to search */
+	const char*	col_name,	/* in: name of autoinc column */
+	ib_longlong*	value)		/* out: AUTOINC value read */
+{
+	ulint		i;
+	ulint		n_cols;
+	dict_field_t*	dfield = NULL;
+	ulint		error = DB_SUCCESS;
+
+	n_cols = dict_index_get_n_ordering_defined_by_user(index);
+
+	/* Search the index for the AUTOINC column name */
+	for (i = 0; i < n_cols; ++i) {
+		dfield = dict_index_get_nth_field(index, i);
+
+		if (strcmp(col_name, dfield->name) == 0) {
+			break;
+		}
+	}
+
+	*value = 0;
+
+	/* Must find the AUTOINC column name */
+	if (i < n_cols && dfield) {
+		mtr_t		mtr;
+		btr_pcur_t	pcur;
+
+		mtr_start(&mtr);
+
+		/* Open at the high/right end (FALSE), and INIT
+		cursor (TRUE) */
+		btr_pcur_open_at_index_side(
+			FALSE, index, BTR_SEARCH_LEAF, &pcur, TRUE, &mtr);
+
+		if (page_get_n_recs(btr_pcur_get_page(&pcur)) > 0) {
+			const rec_t*	rec;
+
+			rec = row_search_autoinc_get_rec(&pcur, &mtr);
+
+			if (rec != NULL) {
+				ibool unsigned_type = (
+					dfield->col->prtype & DATA_UNSIGNED);
+
+				*value = row_search_autoinc_read_column(
+					index, rec, i, unsigned_type);
+			}
+		}
+
+		btr_pcur_close(&pcur);
+
+		mtr_commit(&mtr);
+	} else {
+		error = DB_RECORD_NOT_FOUND;
+	}
+
+	return(error);
 }
diff -Nrup a/storage/innobase/srv/srv0srv.c b/storage/innobase/srv/srv0srv.c
--- a/storage/innobase/srv/srv0srv.c	2007-03-22 15:19:58 -07:00
+++ b/storage/innobase/srv/srv0srv.c	2007-07-18 18:10:25 -07:00
@@ -47,6 +47,7 @@ Created 10/8/1995 Heikki Tuuri
 #include "dict0boot.h"
 #include "srv0start.h"
 #include "row0mysql.h"
+#include "ha_prototypes.h"
 
 /* This is set to TRUE if the MySQL user has set it in MySQL; currently
 affects only FOREIGN KEY definition parsing */
@@ -180,6 +181,16 @@ dulint	srv_archive_recovery_limit_lsn;
 
 ulint	srv_lock_wait_timeout	= 1024 * 1024 * 1024;
 
+/* This parameter is used to throttle the number of insert buffers that are
+merged in a batch. By increasing this parameter on a faster disk you can
+possibly reduce the number of I/O operations performed to complete the
+merge operation. The value of this parameter is used as is by the
+background loop when the system is idle (low load), on a busy system
+the parameter is scaled down by a factor of 4, this is to avoid putting
+a heavier load on the I/O sub system. */
+
+ulong	srv_insert_buffer_batch_size = 20;
+
 char*	srv_file_flush_method_str = NULL;
 ulint	srv_unix_file_flush_method = SRV_UNIX_FDATASYNC;
 ulint	srv_win_file_flush_method = SRV_WIN_IO_UNBUFFERED;
@@ -977,6 +988,17 @@ srv_conc_enter_innodb(
 	srv_conc_slot_t*	slot	  = NULL;
 	ulint			i;
 
+	if (trx->mysql_thd != NULL
+	    && thd_is_replication_slave_thread(trx->mysql_thd)) {
+
+		/* TODO Do something more interesting (based on a config
+		parameter). Some users what to give the replication
+		thread very low priority, see http://bugs.mysql.com/25078
+		This can be done by introducing
+		innodb_replication_delay(ms) config parameter */
+		return;
+	}
+
 	/* If trx has 'free tickets' to enter the engine left, then use one
 	such ticket */
 
@@ -1017,7 +1039,7 @@ retry:
 	if (!has_slept && !trx->has_search_latch
 	    && NULL == UT_LIST_GET_FIRST(trx->trx_locks)) {
 
-		has_slept = TRUE; /* We let is sleep only once to avoid
+		has_slept = TRUE; /* We let it sleep only once to avoid
 				  starvation */
 
 		srv_conc_n_waiting_threads++;
@@ -1130,7 +1152,7 @@ srv_conc_force_enter_innodb(
 
 	srv_conc_n_threads++;
 	trx->declared_to_be_inside_innodb = TRUE;
-	trx->n_tickets_to_enter_innodb = 0;
+	trx->n_tickets_to_enter_innodb = 1;
 
 	os_fast_mutex_unlock(&srv_conc_mutex);
 }
@@ -1152,6 +1174,12 @@ srv_conc_force_exit_innodb(
 		return;
 	}
 
+	if (trx->mysql_thd != NULL
+	    && thd_is_replication_slave_thread(trx->mysql_thd)) {
+
+		return;
+	}
+
 	if (trx->declared_to_be_inside_innodb == FALSE) {
 
 		return;
@@ -1853,6 +1881,7 @@ srv_lock_timeout_and_monitor_thread(
 	double		time_elapsed;
 	time_t		current_time;
 	time_t		last_table_monitor_time;
+	time_t		last_tablespace_monitor_time;
 	time_t		last_monitor_time;
 	ibool		some_waits;
 	double		wait_time;
@@ -1865,6 +1894,7 @@ srv_lock_timeout_and_monitor_thread(
 	UT_NOT_USED(arg);
 	srv_last_monitor_time = time(NULL);
 	last_table_monitor_time = time(NULL);
+	last_tablespace_monitor_time = time(NULL);
 	last_monitor_time = time(NULL);
 loop:
 	srv_lock_timeout_and_monitor_active = TRUE;
@@ -1901,9 +1931,9 @@ loop:
 		}
 
 		if (srv_print_innodb_tablespace_monitor
-		    && difftime(current_time, last_table_monitor_time) > 60) {
-
-			last_table_monitor_time = time(NULL);
+		    && difftime(current_time,
+				last_tablespace_monitor_time) > 60) {
+			last_tablespace_monitor_time = time(NULL);
 
 			fputs("========================"
 			      "========================\n",
@@ -2100,7 +2130,7 @@ loop:
 
 	os_thread_sleep(2000000);
 
-	if (srv_shutdown_state < SRV_SHUTDOWN_LAST_PHASE) {
+	if (srv_shutdown_state < SRV_SHUTDOWN_CLEANUP) {
 
 		goto loop;
 	}
@@ -2270,7 +2300,8 @@ loop:
 			+ buf_pool->n_pages_written;
 		if (n_pend_ios < 3 && (n_ios - n_ios_old < 5)) {
 			srv_main_thread_op_info = "doing insert buffer merge";
-			ibuf_contract_for_n_pages(TRUE, 5);
+			ibuf_contract_for_n_pages(
+				TRUE, srv_insert_buffer_batch_size / 4);
 
 			srv_main_thread_op_info = "flushing log";
 
@@ -2331,7 +2362,7 @@ loop:
 	even if the server were active */
 
 	srv_main_thread_op_info = "doing insert buffer merge";
-	ibuf_contract_for_n_pages(TRUE, 5);
+	ibuf_contract_for_n_pages(TRUE, srv_insert_buffer_batch_size / 4);
 
 	srv_main_thread_op_info = "flushing log";
 	log_buffer_flush_to_disk();
@@ -2469,7 +2500,8 @@ background_loop:
 	if (srv_fast_shutdown && srv_shutdown_state > 0) {
 		n_bytes_merged = 0;
 	} else {
-		n_bytes_merged = ibuf_contract_for_n_pages(TRUE, 20);
+		n_bytes_merged = ibuf_contract_for_n_pages(
+			TRUE, srv_insert_buffer_batch_size);
 	}
 
 	srv_main_thread_op_info = "reserving kernel mutex";
diff -Nrup a/storage/innobase/srv/srv0start.c b/storage/innobase/srv/srv0start.c
--- a/storage/innobase/srv/srv0start.c	2007-01-04 18:51:30 -08:00
+++ b/storage/innobase/srv/srv0start.c	2007-07-18 18:10:25 -07:00
@@ -1025,6 +1025,12 @@ innobase_start_or_create_for_mysql(void)
 		"InnoDB: !!!!!!!! UNIV_DEBUG switched on !!!!!!!!!\n");
 #endif
 
+#ifdef UNIV_IBUF_DEBUG
+	fprintf(stderr,
+		"InnoDB: !!!!!!!! UNIV_IBUF_DEBUG switched on !!!!!!!!!\n"
+		"InnoDB: Crash recovery will fail with UNIV_IBUF_DEBUG\n");
+#endif
+
 #ifdef UNIV_SYNC_DEBUG
 	fprintf(stderr,
 		"InnoDB: !!!!!!!! UNIV_SYNC_DEBUG switched on !!!!!!!!!\n");
diff -Nrup a/storage/innobase/sync/sync0arr.c b/storage/innobase/sync/sync0arr.c
--- a/storage/innobase/sync/sync0arr.c	2007-01-17 16:18:03 -08:00
+++ b/storage/innobase/sync/sync0arr.c	2007-07-18 18:10:26 -07:00
@@ -670,7 +670,9 @@ sync_array_detect_deadlock(
 	ibool		ret;
 	rw_lock_debug_t*debug;
 
-	ut_a(arr && start && cell);
+	ut_a(arr);
+	ut_a(start);
+	ut_a(cell);
 	ut_ad(cell->wait_object);
 	ut_ad(os_thread_get_curr_id() == start->thread);
 	ut_ad(depth < 100);
diff -Nrup a/storage/innobase/sync/sync0rw.c b/storage/innobase/sync/sync0rw.c
--- a/storage/innobase/sync/sync0rw.c	2007-03-22 14:59:27 -07:00
+++ b/storage/innobase/sync/sync0rw.c	2007-07-18 18:10:26 -07:00
@@ -15,16 +15,34 @@ Created 9/11/1995 Heikki Tuuri
 #include "mem0mem.h"
 #include "srv0srv.h"
 
+/* number of system calls made during shared latching */
 ulint	rw_s_system_call_count	= 0;
+
+/* number of spin waits on rw-latches,
+resulted during shared (read) locks */
 ulint	rw_s_spin_wait_count	= 0;
+
+/* number of OS waits on rw-latches,
+resulted during shared (read) locks */
 ulint	rw_s_os_wait_count	= 0;
 
+/* number of unlocks (that unlock shared locks),
+set only when UNIV_SYNC_PERF_STAT is defined */
 ulint	rw_s_exit_count		= 0;
 
+/* number of system calls made during exclusive latching */
 ulint	rw_x_system_call_count	= 0;
+
+/* number of spin waits on rw-latches,
+resulted during exclusive (write) locks */
 ulint	rw_x_spin_wait_count	= 0;
+
+/* number of OS waits on rw-latches,
+resulted during exclusive (write) locks */
 ulint	rw_x_os_wait_count	= 0;
 
+/* number of unlocks (that unlock exclusive locks),
+set only when UNIV_SYNC_PERF_STAT is defined */
 ulint	rw_x_exit_count		= 0;
 
 /* The global list of rw-locks */
diff -Nrup a/storage/innobase/sync/sync0sync.c b/storage/innobase/sync/sync0sync.c
--- a/storage/innobase/sync/sync0sync.c	2007-03-22 14:59:27 -07:00
+++ b/storage/innobase/sync/sync0sync.c	2007-07-18 18:10:26 -07:00
@@ -115,6 +115,7 @@ ulint	mutex_system_call_count		= 0;
 
 /* Number of spin waits on mutexes: for performance monitoring */
 
+/* round=one iteration of a spin loop */
 ulint	mutex_spin_round_count		= 0;
 ulint	mutex_spin_wait_count		= 0;
 ulint	mutex_os_wait_count		= 0;
diff -Nrup a/storage/innobase/trx/trx0rec.c b/storage/innobase/trx/trx0rec.c
--- a/storage/innobase/trx/trx0rec.c	2006-09-21 00:38:55 -07:00
+++ b/storage/innobase/trx/trx0rec.c	2007-07-18 18:10:26 -07:00
@@ -1024,6 +1024,7 @@ trx_undo_report_row_operation(
 	ibool		is_insert;
 	trx_rseg_t*	rseg;
 	mtr_t		mtr;
+	ulint		err		= DB_SUCCESS;
 	mem_heap_t*	heap		= NULL;
 	ulint		offsets_[REC_OFFS_NORMAL_SIZE];
 	ulint*		offsets		= offsets_;
@@ -1035,7 +1036,7 @@ trx_undo_report_row_operation(
 
 		*roll_ptr = ut_dulint_zero;
 
-		return(DB_SUCCESS);
+		return(err);
 	}
 
 	ut_ad(thr);
@@ -1053,7 +1054,7 @@ trx_undo_report_row_operation(
 
 		if (trx->insert_undo == NULL) {
 
-			trx_undo_assign_undo(trx, TRX_UNDO_INSERT);
+			err = trx_undo_assign_undo(trx, TRX_UNDO_INSERT);
 		}
 
 		undo = trx->insert_undo;
@@ -1063,7 +1064,7 @@ trx_undo_report_row_operation(
 
 		if (trx->update_undo == NULL) {
 
-			trx_undo_assign_undo(trx, TRX_UNDO_UPDATE);
+			err = trx_undo_assign_undo(trx, TRX_UNDO_UPDATE);
 
 		}
 
@@ -1071,11 +1072,11 @@ trx_undo_report_row_operation(
 		is_insert = FALSE;
 	}
 
-	if (undo == NULL) {
-		/* Did not succeed: out of space */
+	if (err != DB_SUCCESS) {
+		/* Did not succeed: return the error encountered */
 		mutex_exit(&(trx->undo_mutex));
 
-		return(DB_OUT_OF_FILE_SPACE);
+		return(err);
 	}
 
 	page_no = undo->last_page_no;
@@ -1107,7 +1108,9 @@ trx_undo_report_row_operation(
 		if (offset == 0) {
 			/* The record did not fit on the page. We erase the
 			end segment of the undo log page and write a log
-			record of it to to ensure deterministic contents. */
+			record of it: this is to ensure that in the debug
+			version the replicate page constructed using the log
+			records stays identical to the original page */
 
 			trx_undo_erase_page_end(undo_page, &mtr);
 		}
@@ -1163,7 +1166,7 @@ trx_undo_report_row_operation(
 	if (UNIV_LIKELY_NULL(heap)) {
 		mem_heap_free(heap);
 	}
-	return(DB_SUCCESS);
+	return(err);
 }
 
 /*============== BUILDING PREVIOUS VERSION OF A RECORD ===============*/
diff -Nrup a/storage/innobase/trx/trx0sys.c b/storage/innobase/trx/trx0sys.c
--- a/storage/innobase/trx/trx0sys.c	2007-03-22 14:59:27 -07:00
+++ b/storage/innobase/trx/trx0sys.c	2007-07-18 18:10:26 -07:00
@@ -868,7 +868,16 @@ trx_sysf_create(
 		trx_sysf_rseg_set_page_no(sys_header, i, FIL_NULL, mtr);
 	}
 
-	/* The remaining area (up to the page trailer) is uninitialized. */
+	/* The remaining area (up to the page trailer) is uninitialized.
+	Silence Valgrind warnings about it. */
+	UNIV_MEM_VALID(sys_header + (TRX_SYS_RSEGS
+				     + TRX_SYS_N_RSEGS * TRX_SYS_RSEG_SLOT_SIZE
+				     + TRX_SYS_RSEG_SPACE),
+		       (UNIV_PAGE_SIZE - FIL_PAGE_DATA_END
+			- (TRX_SYS_RSEGS
+			   + TRX_SYS_N_RSEGS * TRX_SYS_RSEG_SLOT_SIZE
+			   + TRX_SYS_RSEG_SPACE))
+		       + page - sys_header);
 
 	/* Create the first rollback segment in the SYSTEM tablespace */
 	page_no = trx_rseg_header_create(TRX_SYS_SPACE, ULINT_MAX, &slot_no,
diff -Nrup a/storage/innobase/trx/trx0trx.c b/storage/innobase/trx/trx0trx.c
--- a/storage/innobase/trx/trx0trx.c	2007-03-28 21:46:22 -07:00
+++ b/storage/innobase/trx/trx0trx.c	2007-07-18 18:10:26 -07:00
@@ -25,6 +25,7 @@ Created 3/26/1996 Heikki Tuuri
 #include "btr0sea.h"
 #include "os0proc.h"
 #include "trx0xa.h"
+#include "ha_prototypes.h"
 
 /* Copy of the prototype for innobase_mysql_print_thd: this
 copy MUST be equal to the one in mysql/sql/ha_innodb.cc ! */
@@ -130,6 +131,8 @@ trx_create(
 
 	trx->mysql_thd = NULL;
 	trx->mysql_query_str = NULL;
+	trx->active_trans = 0;
+	trx->duplicates = 0;
 
 	trx->n_mysql_tables_in_use = 0;
 	trx->mysql_n_tables_locked = 0;
@@ -192,6 +195,8 @@ trx_create(
 	memset(&trx->xid, 0, sizeof(trx->xid));
 	trx->xid.formatID = -1;
 
+	trx->n_autoinc_rows = 0;
+
 	trx_reset_new_rec_lock_info(trx);
 
 	return(trx);
@@ -1567,19 +1572,21 @@ trx_commit_for_mysql(
 	the transaction object does not have an InnoDB session object, and we
 	set the dummy session that we use for all MySQL transactions. */
 
-	mutex_enter(&kernel_mutex);
-
 	if (trx->sess == NULL) {
 		/* Open a dummy session */
 
 		if (!trx_dummy_sess) {
-			trx_dummy_sess = sess_open();
+			mutex_enter(&kernel_mutex);
+
+			if (!trx_dummy_sess) {
+				trx_dummy_sess = sess_open();
+			}
+
+			mutex_exit(&kernel_mutex);
 		}
 
 		trx->sess = trx_dummy_sess;
 	}
-	
-	mutex_exit(&kernel_mutex);
 
 	trx_start_if_not_started(trx);
 
@@ -1771,6 +1778,61 @@ trx_print(
 	}
 }
 
+/***********************************************************************
+Compares the "weight" (or size) of two transactions. The weight of one
+transaction is estimated as the number of altered rows + the number of
+locked rows. Transactions that have edited non-transactional tables are
+considered heavier than ones that have not. */
+
+int
+trx_weight_cmp(
+/*===========*/
+			/* out: <0, 0 or >0; similar to strcmp(3) */
+	trx_t*	a,	/* in: the first transaction to be compared */
+	trx_t*	b)	/* in: the second transaction to be compared */
+{
+	ibool	a_notrans_edit;
+	ibool	b_notrans_edit;
+
+	/* If mysql_thd is NULL for a transaction we assume that it has
+	not edited non-transactional tables. */
+
+	a_notrans_edit = a->mysql_thd != NULL
+	    && thd_has_edited_nontrans_tables(a->mysql_thd);
+
+	b_notrans_edit = b->mysql_thd != NULL
+	    && thd_has_edited_nontrans_tables(b->mysql_thd);
+
+	if (a_notrans_edit && !b_notrans_edit) {
+
+		return(1);
+	}
+
+	if (!a_notrans_edit && b_notrans_edit) {
+
+		return(-1);
+	}
+
+	/* Either both had edited non-transactional tables or both had
+	not, we fall back to comparing the number of altered/locked
+	rows. */
+
+#if 0
+	fprintf(stderr,
+		"%s TRX_WEIGHT(a): %lld+%lu, TRX_WEIGHT(b): %lld+%lu\n",
+		__func__,
+		ut_conv_dulint_to_longlong(a->undo_no),
+		UT_LIST_GET_LEN(a->trx_locks),
+		ut_conv_dulint_to_longlong(b->undo_no),
+		UT_LIST_GET_LEN(b->trx_locks));
+#endif
+
+#define TRX_WEIGHT(t)	\
+	ut_dulint_add((t)->undo_no, UT_LIST_GET_LEN((t)->trx_locks))
+
+	return(ut_dulint_cmp(TRX_WEIGHT(a), TRX_WEIGHT(b)));
+}
+
 /********************************************************************
 Prepares a transaction. */
 
@@ -1889,7 +1951,7 @@ Does the transaction prepare for MySQL. 
 
 ulint
 trx_prepare_for_mysql(
-/*====-=============*/
+/*==================*/
 			/* out: 0 or error number */
 	trx_t*	trx)	/* in: trx handle */
 {
diff -Nrup a/storage/innobase/trx/trx0undo.c b/storage/innobase/trx/trx0undo.c
--- a/storage/innobase/trx/trx0undo.c	2007-03-22 14:59:27 -07:00
+++ b/storage/innobase/trx/trx0undo.c	2007-07-18 18:10:26 -07:00
@@ -373,26 +373,31 @@ trx_undo_page_init(
 /*******************************************************************
 Creates a new undo log segment in file. */
 static
-page_t*
+ulint
 trx_undo_seg_create(
 /*================*/
-				/* out: segment header page x-latched, NULL
-				if no space left */
+				/* out: DB_SUCCESS if page creation OK
+				possible error codes are:
+				DB_TOO_MANY_CONCURRENT_TRXS
+				DB_OUT_OF_FILE_SPACE */
 	trx_rseg_t*	rseg __attribute__((unused)),/* in: rollback segment */
 	trx_rsegf_t*	rseg_hdr,/* in: rollback segment header, page
 				x-latched */
 	ulint		type,	/* in: type of the segment: TRX_UNDO_INSERT or
 				TRX_UNDO_UPDATE */
 	ulint*		id,	/* out: slot index within rseg header */
+	page_t**	undo_page,
+				/* out: segment header page x-latched, NULL
+				if there was an error */
 	mtr_t*		mtr)	/* in: mtr */
 {
 	ulint		slot_no;
 	ulint		space;
-	page_t*		undo_page;
 	trx_upagef_t*	page_hdr;
 	trx_usegf_t*	seg_hdr;
 	ulint		n_reserved;
 	ibool		success;
+	ulint		err = DB_SUCCESS;
 
 	ut_ad(mtr && id && rseg_hdr);
 	ut_ad(mutex_own(&(rseg->mutex)));
@@ -410,7 +415,7 @@ trx_undo_seg_create(
 			"InnoDB: many active transactions"
 			" running concurrently?\n");
 
-		return(NULL);
+		return(DB_TOO_MANY_CONCURRENT_TRXS);
 	}
 
 	space = buf_frame_get_space_id(rseg_hdr);
@@ -419,30 +424,30 @@ trx_undo_seg_create(
 					   mtr);
 	if (!success) {
 
-		return(NULL);
+		return(DB_OUT_OF_FILE_SPACE);
 	}
 
 	/* Allocate a new file segment for the undo log */
-	undo_page = fseg_create_general(space, 0,
+	*undo_page = fseg_create_general(space, 0,
 					TRX_UNDO_SEG_HDR
 					+ TRX_UNDO_FSEG_HEADER, TRUE, mtr);
 
 	fil_space_release_free_extents(space, n_reserved);
 
-	if (undo_page == NULL) {
+	if (*undo_page == NULL) {
 		/* No space left */
 
-		return(NULL);
+		return(DB_OUT_OF_FILE_SPACE);
 	}
 
 #ifdef UNIV_SYNC_DEBUG
-	buf_page_dbg_add_level(undo_page, SYNC_TRX_UNDO_PAGE);
+	buf_page_dbg_add_level(*undo_page, SYNC_TRX_UNDO_PAGE);
 #endif /* UNIV_SYNC_DEBUG */
 
-	page_hdr = undo_page + TRX_UNDO_PAGE_HDR;
-	seg_hdr = undo_page + TRX_UNDO_SEG_HDR;
+	page_hdr = *undo_page + TRX_UNDO_PAGE_HDR;
+	seg_hdr = *undo_page + TRX_UNDO_SEG_HDR;
 
-	trx_undo_page_init(undo_page, type, mtr);
+	trx_undo_page_init(*undo_page, type, mtr);
 
 	mlog_write_ulint(page_hdr + TRX_UNDO_PAGE_FREE,
 			 TRX_UNDO_SEG_HDR + TRX_UNDO_SEG_HDR_SIZE,
@@ -456,10 +461,11 @@ trx_undo_seg_create(
 		      page_hdr + TRX_UNDO_PAGE_NODE, mtr);
 
 	trx_rsegf_set_nth_undo(rseg_hdr, slot_no,
-			       buf_frame_get_page_no(undo_page), mtr);
+				buf_frame_get_page_no(*undo_page), mtr);
+
 	*id = slot_no;
 
-	return(undo_page);
+	return(err);
 }
 
 /**************************************************************************
@@ -1387,6 +1393,11 @@ trx_undo_mem_create(
 
 	undo = mem_alloc(sizeof(trx_undo_t));
 
+	if (undo == NULL) {
+
+		return NULL;
+	}
+
 	undo->id = id;
 	undo->type = type;
 	undo->state = TRX_UNDO_ACTIVE;
@@ -1464,11 +1475,15 @@ trx_undo_mem_free(
 /**************************************************************************
 Creates a new undo log. */
 static
-trx_undo_t*
+ulint
 trx_undo_create(
 /*============*/
-				/* out: undo log object, NULL if did not
-				succeed: out of space */
+				/* out: DB_SUCCESS if successful in creating
+				the new undo lob object, possible error
+				codes are: 
+				DB_TOO_MANY_CONCURRENT_TRXS
+				DB_OUT_OF_FILE_SPACE 
+				DB_OUT_OF_MEMORY*/
 	trx_t*		trx,	/* in: transaction */
 	trx_rseg_t*	rseg,	/* in: rollback segment memory copy */
 	ulint		type,	/* in: type of the log: TRX_UNDO_INSERT or
@@ -1476,34 +1491,37 @@ trx_undo_create(
 	dulint		trx_id,	/* in: id of the trx for which the undo log
 				is created */
 	XID*		xid,	/* in: X/Open transaction identification*/
+	trx_undo_t**	undo,	/* out: the new undo log object, undefined
+				 * if did not succeed */
 	mtr_t*		mtr)	/* in: mtr */
 {
 	trx_rsegf_t*	rseg_header;
 	ulint		page_no;
 	ulint		offset;
 	ulint		id;
-	trx_undo_t*	undo;
 	page_t*		undo_page;
+	ulint		err;
 
 	ut_ad(mutex_own(&(rseg->mutex)));
 
 	if (rseg->curr_size == rseg->max_size) {
 
-		return(NULL);
+		return(DB_OUT_OF_FILE_SPACE);
 	}
 
 	rseg->curr_size++;
 
 	rseg_header = trx_rsegf_get(rseg->space, rseg->page_no, mtr);
 
-	undo_page = trx_undo_seg_create(rseg, rseg_header, type, &id, mtr);
+	err = trx_undo_seg_create(rseg, rseg_header, type, &id,
+							&undo_page, mtr);
 
-	if (undo_page == NULL) {
+	if (err != DB_SUCCESS) {
 		/* Did not succeed */
 
 		rseg->curr_size--;
 
-		return(NULL);
+		return(err);
 	}
 
 	page_no = buf_frame_get_page_no(undo_page);
@@ -1515,9 +1533,14 @@ trx_undo_create(
 						  undo_page + offset, mtr);
 	}
 
-	undo = trx_undo_mem_create(rseg, id, type, trx_id, xid,
+	*undo = trx_undo_mem_create(rseg, id, type, trx_id, xid,
 				   page_no, offset);
-	return(undo);
+	if (*undo == NULL) {
+
+		err = DB_OUT_OF_MEMORY;
+	}
+
+	return(err);
 }
 
 /*================ UNDO LOG ASSIGNMENT AND CLEANUP =====================*/
@@ -1634,17 +1657,20 @@ trx_undo_mark_as_dict_operation(
 Assigns an undo log for a transaction. A new undo log is created or a cached
 undo log reused. */
 
-trx_undo_t*
+ulint
 trx_undo_assign_undo(
 /*=================*/
-			/* out: the undo log, NULL if did not succeed: out of
-			space */
-	trx_t*	trx,	/* in: transaction */
-	ulint	type)	/* in: TRX_UNDO_INSERT or TRX_UNDO_UPDATE */
+				/* out: DB_SUCCESS if undo log assign
+				successful, possible error codes are:
+				DD_TOO_MANY_CONCURRENT_TRXS
+				DB_OUT_OF_FILE_SPACE DB_OUT_OF_MEMORY*/
+	trx_t*		trx,	/* in: transaction */
+	ulint		type)	/* in: TRX_UNDO_INSERT or TRX_UNDO_UPDATE */
 {
 	trx_rseg_t*	rseg;
 	trx_undo_t*	undo;
 	mtr_t		mtr;
+	ulint		err = DB_SUCCESS;
 
 	ut_ad(trx);
 	ut_ad(trx->rseg);
@@ -1662,15 +1688,11 @@ trx_undo_assign_undo(
 	undo = trx_undo_reuse_cached(trx, rseg, type, trx->id, &trx->xid,
 				     &mtr);
 	if (undo == NULL) {
-		undo = trx_undo_create(trx, rseg, type, trx->id, &trx->xid,
-				       &mtr);
-		if (undo == NULL) {
-			/* Did not succeed */
+		err = trx_undo_create(trx, rseg, type, trx->id, &trx->xid,
+								&undo, &mtr);
+		if (err != DB_SUCCESS) {
 
-			mutex_exit(&(rseg->mutex));
-			mtr_commit(&mtr);
-
-			return(NULL);
+			goto func_exit;
 		}
 	}
 
@@ -1688,10 +1710,11 @@ trx_undo_assign_undo(
 		trx_undo_mark_as_dict_operation(trx, undo, &mtr);
 	}
 
+func_exit:
 	mutex_exit(&(rseg->mutex));
 	mtr_commit(&mtr);
 
-	return(undo);
+	return err;
 }
 
 /**********************************************************************
diff -Nrup a/storage/innobase/ut/ut0mem.c b/storage/innobase/ut/ut0mem.c
--- a/storage/innobase/ut/ut0mem.c	2006-09-04 16:16:25 -07:00
+++ b/storage/innobase/ut/ut0mem.c	2007-07-18 18:10:26 -07:00
@@ -162,6 +162,8 @@ retry:
 #endif
 	}
 
+	UNIV_MEM_ALLOC(ret, n + sizeof(ut_mem_block_t));
+
 	((ut_mem_block_t*)ret)->size = n + sizeof(ut_mem_block_t);
 	((ut_mem_block_t*)ret)->magic_n = UT_MEM_MAGIC_N;
 
diff -Nrup a/storage/innobase/ut/ut0ut.c b/storage/innobase/ut/ut0ut.c
--- a/storage/innobase/ut/ut0ut.c	2007-03-22 15:20:00 -07:00
+++ b/storage/innobase/ut/ut0ut.c	2007-07-18 18:10:26 -07:00
@@ -14,6 +14,7 @@ Created 5/11/1994 Heikki Tuuri
 
 #include <stdarg.h>
 #include <string.h>
+#include <ctype.h>
 
 #include "ut0sort.h"
 #include "trx0trx.h"
Thread
bk commit into 5.1 tree (acurtis:1.2556)antony19 Jul