List:Commits« Previous MessageNext Message »
From:Jon Olav Hauglid Date:January 4 2012 5:08pm
Subject:bzr push into mysql-trunk-wl5534 branch (jon.hauglid:3459 to 3460)
View as plain text  
 3460 Jon Olav Hauglid	2012-01-04 [merge]
      Merge from mysql-trunk to mysql-trunk-wl5534
      No conflicts

    added:
      mysql-test/t/mysql_embedded-master.opt
      storage/innobase/buf/buf0dblwr.cc
      storage/innobase/include/buf0dblwr.h
    modified:
      client/mysql_upgrade.c
      client/mysqldump.c
      libmysqld/lib_sql.cc
      libmysqld/libmysqld.c
      mysql-test/collections/default.experimental
      mysql-test/mysql-test-run.pl
      mysql-test/r/func_group.result
      mysql-test/r/init_connect.result
      mysql-test/r/mysqldump.result
      mysql-test/r/sp.result
      mysql-test/r/xa.result
      mysql-test/suite/rpl/r/rpl_known_bugs_detection.result
      mysql-test/suite/rpl/t/rpl_known_bugs_detection.test
      mysql-test/t/func_group.test
      mysql-test/t/init_connect.test
      mysql-test/t/mysqldump.test
      mysql-test/t/sp.test
      mysql-test/t/xa.test
      mysys/my_conio.c
      sql/field.cc
      sql/opt_sum.cc
      sql/rpl_slave.cc
      sql/sql_class.h
      sql/sql_connect.cc
      sql/sql_parse.cc
      storage/federated/ha_federated.cc
      storage/innobase/CMakeLists.txt
      storage/innobase/buf/buf0buf.cc
      storage/innobase/buf/buf0flu.cc
      storage/innobase/buf/buf0lru.cc
      storage/innobase/buf/buf0rea.cc
      storage/innobase/handler/ha_innodb.cc
      storage/innobase/ibuf/ibuf0ibuf.cc
      storage/innobase/include/buf0types.h
      storage/innobase/include/mem0mem.ic
      storage/innobase/include/mtr0log.ic
      storage/innobase/include/os0file.h
      storage/innobase/include/srv0srv.h
      storage/innobase/include/sync0rw.h
      storage/innobase/include/sync0rw.ic
      storage/innobase/include/sync0sync.h
      storage/innobase/include/trx0sys.h
      storage/innobase/include/trx0types.h
      storage/innobase/include/univ.i
      storage/innobase/include/ut0mem.h
      storage/innobase/include/ut0rnd.ic
      storage/innobase/log/log0recv.cc
      storage/innobase/mem/mem0pool.cc
      storage/innobase/os/os0file.cc
      storage/innobase/os/os0proc.cc
      storage/innobase/row/row0sel.cc
      storage/innobase/srv/srv0srv.cc
      storage/innobase/srv/srv0start.cc
      storage/innobase/sync/sync0rw.cc
      storage/innobase/trx/trx0sys.cc
      storage/innobase/ut/ut0mem.cc
 3459 Jon Olav Hauglid	2011-12-21
      WL#5534 Online ALTER, Phase 1.
      
      Patch #71: Review changes:
      - Use handler::notify_table_changed() instead of
        handler::ha_create_handler_files() to notify storage
        engine about updated table definition.
      - Remove commit between inplace_alter_table() and
        commit_inplace_alter_table() calls.

    modified:
      sql/ha_partition.cc
      sql/ha_partition.h
      sql/handler.cc
      sql/handler.h
      sql/sql_table.cc
=== modified file 'client/mysql_upgrade.c'
--- a/client/mysql_upgrade.c	2011-11-15 12:34:23 +0000
+++ b/client/mysql_upgrade.c	2011-12-21 19:46:44 +0000
@@ -243,7 +243,6 @@ get_one_option(int optid, const struct m
   switch (optid) {
 
   case '?':
-    puts(ORACLE_WELCOME_COPYRIGHT_NOTICE("2000, 2011"));
     printf("%s  Ver %s Distrib %s, for %s (%s)\n",
            my_progname, VER, MYSQL_SERVER_VERSION, SYSTEM_TYPE, MACHINE_TYPE);
     puts(ORACLE_WELCOME_COPYRIGHT_NOTICE("2000, 2010"));

=== modified file 'client/mysqldump.c'
--- a/client/mysqldump.c	2011-10-28 12:45:35 +0000
+++ b/client/mysqldump.c	2011-12-24 09:43:56 +0000
@@ -569,6 +569,8 @@ static void verbose_msg(const char *fmt,
   vfprintf(stderr, fmt, args);
   va_end(args);
 
+  fflush(stderr);
+
   DBUG_VOID_RETURN;
 }
 
@@ -4098,6 +4100,8 @@ static int dump_all_tables_in_db(char *d
     if (mysql_refresh(mysql, REFRESH_LOG))
       DB_error(mysql, "when doing refresh");
            /* We shall continue here, if --force was given */
+    else
+      verbose_msg("-- dump_all_tables_in_db : logs flushed successfully!\n");
   }
   while ((table= getTableName(0)))
   {
@@ -4198,6 +4202,8 @@ static my_bool dump_all_views_in_db(char
     if (mysql_refresh(mysql, REFRESH_LOG))
       DB_error(mysql, "when doing refresh");
            /* We shall continue here, if --force was given */
+    else
+      verbose_msg("-- dump_all_views_in_db : logs flushed successfully!\n");
   }
   while ((table= getTableName(0)))
   {
@@ -4336,6 +4342,8 @@ static int dump_selected_tables(char *db
       DB_error(mysql, "when doing refresh");
     }
      /* We shall countinue here, if --force was given */
+    else
+      verbose_msg("-- dump_selected_tables : logs flushed successfully!\n");
   }
   if (opt_xml)
     print_xml_tag(md_result_file, "", "\n", "database", "name=", db, NullS);
@@ -4619,6 +4627,7 @@ static int purge_bin_logs_to(MYSQL *mysq
 
 static int start_transaction(MYSQL *mysql_con)
 {
+  verbose_msg("-- Starting transaction...\n");
   /*
     We use BEGIN for old servers. --single-transaction --master-data will fail
     on old servers, but that's ok as it was already silently broken (it didn't
@@ -5218,24 +5227,39 @@ int main(int argc, char **argv)
   if (opt_slave_data && do_stop_slave_sql(mysql))
     goto err;
 
-  if ((opt_lock_all_tables || opt_master_data) &&
+  if ((opt_lock_all_tables || opt_master_data ||
+       (opt_single_transaction && flush_logs)) &&
       do_flush_tables_read_lock(mysql))
     goto err;
-  if (opt_single_transaction && start_transaction(mysql))
-      goto err;
-  if (opt_delete_master_logs)
+
+  /*
+    Flush logs before starting transaction since
+    this causes implicit commit starting mysql-5.5.
+  */
+  if (opt_lock_all_tables || opt_master_data ||
+      (opt_single_transaction && flush_logs) ||
+      opt_delete_master_logs)
   {
-    if (mysql_refresh(mysql, REFRESH_LOG) ||
-        get_bin_log_name(mysql, bin_log_name, sizeof(bin_log_name)))
-      goto err;
+    if (flush_logs || opt_delete_master_logs)
+    {
+      if (mysql_refresh(mysql, REFRESH_LOG))
+        goto err;
+      verbose_msg("-- main : logs flushed successfully!\n");
+    }
+
+    /* Not anymore! That would not be sensible. */
     flush_logs= 0;
   }
-  if (opt_lock_all_tables || opt_master_data)
+
+  if (opt_delete_master_logs)
   {
-    if (flush_logs && mysql_refresh(mysql, REFRESH_LOG))
+    if (get_bin_log_name(mysql, bin_log_name, sizeof(bin_log_name)))
       goto err;
-    flush_logs= 0; /* not anymore; that would not be sensible */
   }
+
+  if (opt_single_transaction && start_transaction(mysql))
+    goto err;
+
   /* Add 'STOP SLAVE to beginning of dump */
   if (opt_slave_apply && add_stop_slave())
     goto err;

=== modified file 'libmysqld/lib_sql.cc'
--- a/libmysqld/lib_sql.cc	2011-10-13 07:26:28 +0000
+++ b/libmysqld/lib_sql.cc	2011-12-23 08:55:18 +0000
@@ -500,11 +500,14 @@ int init_embedded_server(int argc, char
     This mess is to allow people to call the init function without
     having to mess with a fake argv
    */
-  int *argcp;
-  char ***argvp;
-  int fake_argc = 1;
-  char *fake_argv[] = { (char *)"", 0 };
-  const char *fake_groups[] = { "server", "embedded", 0 };
+  int *argcp= NULL;
+  char ***argvp= NULL;
+  int fake_argc= 1;
+  char *fake_argv[2];
+  char fake_server[]= "server";
+  char fake_embedded[]= "embedded";
+  char *fake_groups[]= { fake_server, fake_embedded, NULL };
+  char fake_name[]= "fake_name";
   my_bool acl_error;
 
   if (my_thread_init())
@@ -513,17 +516,21 @@ int init_embedded_server(int argc, char
   if (argc)
   {
     argcp= &argc;
-    argvp= (char***) &argv;
+    argvp= &argv;
   }
   else
   {
+    fake_argv[0]= fake_name;
+    fake_argv[1]= NULL;
+
+    char **foo= &fake_argv[0];
     argcp= &fake_argc;
-    argvp= (char ***) &fake_argv;
+    argvp= &foo;
   }
   if (!groups)
-    groups= (char**) fake_groups;
+    groups= fake_groups;
 
-  my_progname= (char *)"mysql_embedded";
+  my_progname= "mysql_embedded";
 
   /*
     Perform basic logger initialization logger. Should be called after

=== modified file 'libmysqld/libmysqld.c'
--- a/libmysqld/libmysqld.c	2011-06-30 15:50:45 +0000
+++ b/libmysqld/libmysqld.c	2011-12-28 13:43:30 +0000
@@ -81,7 +81,7 @@ mysql_real_connect(MYSQL *mysql,const ch
 		   const char *passwd, const char *db,
 		   uint port, const char *unix_socket,ulong client_flag)
 {
-  char name_buff[USERNAME_LENGTH];
+  char name_buff[USERNAME_LENGTH + 1];
 
   DBUG_ENTER("mysql_real_connect");
   DBUG_PRINT("enter",("host: %s  db: %s  user: %s (libmysqld)",

=== modified file 'mysql-test/collections/default.experimental'
--- a/mysql-test/collections/default.experimental	2011-11-21 05:31:50 +0000
+++ b/mysql-test/collections/default.experimental	2011-12-27 11:12:58 +0000
@@ -19,7 +19,6 @@ main.innodb_mrr_cost_icp
 main.innodb_mrr                          # Bug#12682554 2011-08-17 Occasional failure in PB2
 
 innodb.innodb_monitor                    # Bug#12320827 2011-08-04 Occasional failure in PB2
-innodb.innodb_bug56143 @solaris          # Bug#11765460 2011-08-04 Occasional failure in PB2
 
 rpl.rpl_change_master_dbug               # BUG#11933491 2011-06-13 Anitha  Test fails on redhat 
 rpl.rpl_delayed_slave                    # BUG#11764654 rpl_delayed_slave fails sporadically in pb

=== modified file 'mysql-test/mysql-test-run.pl'
--- a/mysql-test/mysql-test-run.pl	2011-12-05 12:01:10 +0000
+++ b/mysql-test/mysql-test-run.pl	2011-12-23 08:55:18 +0000
@@ -2013,8 +2013,9 @@ sub executable_setup () {
   $exe_mysql=          mtr_exe_exists("$path_client_bindir/mysql");
   $exe_mysql_plugin=   mtr_exe_exists("$path_client_bindir/mysql_plugin");
 
-  $exe_mysql_embedded= mtr_exe_maybe_exists("$basedir/libmysqld/examples/mysql_embedded",
-                                            "$bindir/bin/mysql_embedded");
+  $exe_mysql_embedded=
+    mtr_exe_maybe_exists("$bindir/libmysqld/examples/mysql_embedded",
+                         "$bindir/bin/mysql_embedded");
 
   if ( ! $opt_skip_ndbcluster )
   {

=== modified file 'mysql-test/r/func_group.result'
--- a/mysql-test/r/func_group.result	2011-08-03 11:29:20 +0000
+++ b/mysql-test/r/func_group.result	2011-12-22 13:36:08 +0000
@@ -1804,3 +1804,12 @@ AVG(DISTINCT outr.col_int_nokey)
 7.5000
 DROP TABLE t1;
 # End of the bug#57932
+#
+# BUG#12773464 - 61925: WRONG RESULT WITH AGGREGATE + NOT BETWEEN + KEY
+#
+CREATE TABLE t1 (a int, KEY (a));
+INSERT INTO t1 VALUES (1),(2),(3),(4),(5),(6),(7),(8),(9),(10);
+SELECT MAX(a) FROM t1 WHERE a NOT BETWEEN 3 AND 9;
+MAX(a)
+10
+DROP TABLE t1;

=== modified file 'mysql-test/r/init_connect.result'
--- a/mysql-test/r/init_connect.result	2006-11-15 09:23:27 +0000
+++ b/mysql-test/r/init_connect.result	2012-01-02 06:25:48 +0000
@@ -20,6 +20,8 @@ hex(a)
 616263
 set GLOBAL init_connect="adsfsdfsdfs";
 select @a;
+ERROR 08S01: Aborted connection to db: 'test' user: 'user_1' host: 'localhost' (init_connect command failed)
+select @a;
 Got one of the listed errors
 drop table t1;
 End of 4.1 tests

=== modified file 'mysql-test/r/mysqldump.result'
--- a/mysql-test/r/mysqldump.result	2011-05-10 13:37:37 +0000
+++ b/mysql-test/r/mysqldump.result	2011-12-24 09:43:56 +0000
@@ -4718,3 +4718,107 @@ UNLOCK TABLES;
 /*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */;
 
 DROP TABLE t1;
+#
+# Bug#12809202 61854: MYSQLDUMP --SINGLE-TRANSACTION --FLUSH-LOG BREAKS
+#                     CONSISTENCY
+#
+DROP DATABASE IF EXISTS b12809202_db;
+CREATE DATABASE b12809202_db;
+CREATE TABLE b12809202_db.t1 (c1 INT);
+CREATE TABLE b12809202_db.t2 (c1 INT);
+INSERT INTO b12809202_db.t1 VALUES (1), (2), (3);
+INSERT INTO b12809202_db.t2 VALUES (1), (2), (3);
+# Starting mysqldump with --single-transaction & --flush-log options..
+# Note : In the following dump the transaction
+#        should start only after the logs are
+#        flushed, as 'flush logs' causes implicit
+#        commit starting 5.5.
+
+#### Dump starts here ####
+-- Connecting to localhost...
+-- main : logs flushed successfully!
+-- Starting transaction...
+-- Retrieving table structure for table t1...
+-- Sending SELECT query...
+-- Retrieving rows...
+--
+-- Host: localhost    Database: b12809202_db
+-- ------------------------------------------------------
+
+/*!40101 SET @OLD_CHARACTER_SET_CLIENT=@@CHARACTER_SET_CLIENT */;
+/*!40101 SET @OLD_CHARACTER_SET_RESULTS=@@CHARACTER_SET_RESULTS */;
+/*!40101 SET @OLD_COLLATION_CONNECTION=@@COLLATION_CONNECTION */;
+/*!40101 SET NAMES utf8 */;
+/*!40103 SET @OLD_TIME_ZONE=@@TIME_ZONE */;
+/*!40103 SET TIME_ZONE='+00:00' */;
+/*!40014 SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0 */;
+/*!40014 SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0 */;
+/*!40101 SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='NO_AUTO_VALUE_ON_ZERO' */;
+/*!40111 SET @OLD_SQL_NOTES=@@SQL_NOTES, SQL_NOTES=0 */;
+
+--
+-- Table structure for table `t1`
+--
+
+DROP TABLE IF EXISTS `t1`;
+/*!40101 SET @saved_cs_client     = @@character_set_client */;
+/*!40101 SET character_set_client = utf8 */;
+CREATE TABLE `t1` (
+  `c1` int(11) DEFAULT NULL
+) ENGINE=MyISAM DEFAULT CHARSET=latin1;
+/*!40101 SET character_set_client = @saved_cs_client */;
+
+--
+-- Dumping data for table `t1`
+--
+
+LOCK TABLES `t1` WRITE;
+/*!40000 ALTER TABLE `t1` DISABLE KEYS */;
+INSERT INTO `t1` VALUES (1),(2),(3);
+-- Retrieving table structure for table t2...
+-- Sending SELECT query...
+-- Retrieving rows...
+/*!40000 ALTER TABLE `t1` ENABLE KEYS */;
+UNLOCK TABLES;
+
+--
+-- Table structure for table `t2`
+--
+
+DROP TABLE IF EXISTS `t2`;
+/*!40101 SET @saved_cs_client     = @@character_set_client */;
+/*!40101 SET character_set_client = utf8 */;
+CREATE TABLE `t2` (
+  `c1` int(11) DEFAULT NULL
+) ENGINE=MyISAM DEFAULT CHARSET=latin1;
+/*!40101 SET character_set_client = @saved_cs_client */;
+
+--
+-- Dumping data for table `t2`
+--
+
+LOCK TABLES `t2` WRITE;
+/*!40000 ALTER TABLE `t2` DISABLE KEYS */;
+INSERT INTO `t2` VALUES (1),(2),(3);
+/*!40000 ALTER TABLE `t2` ENABLE KEYS */;
+UNLOCK TABLES;
+-- Disconnecting from localhost...
+/*!40103 SET TIME_ZONE=@OLD_TIME_ZONE */;
+
+/*!40101 SET SQL_MODE=@OLD_SQL_MODE */;
+/*!40014 SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS */;
+/*!40014 SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS */;
+/*!40101 SET CHARACTER_SET_CLIENT=@OLD_CHARACTER_SET_CLIENT */;
+/*!40101 SET CHARACTER_SET_RESULTS=@OLD_CHARACTER_SET_RESULTS */;
+/*!40101 SET COLLATION_CONNECTION=@OLD_COLLATION_CONNECTION */;
+/*!40111 SET SQL_NOTES=@OLD_SQL_NOTES */;
+
+-- Dump completed
+#### Dump ends here ####
+DROP TABLE b12809202_db.t1;
+DROP TABLE b12809202_db.t2;
+DROP DATABASE b12809202_db;
+#
+# Delete all existing binary logs.
+#
+RESET MASTER;

=== modified file 'mysql-test/r/sp.result'
--- a/mysql-test/r/sp.result	2011-12-14 17:08:53 +0000
+++ b/mysql-test/r/sp.result	2011-12-22 11:12:32 +0000
@@ -2724,20 +2724,23 @@ userid
 drop procedure bug8116|
 drop table t3|
 drop procedure if exists bug6857|
-create procedure bug6857(counter int)
+create procedure bug6857()
 begin
 declare t0, t1 int;
 declare plus bool default 0;
-set t0 = current_time();
-while counter > 0 do
-set counter = counter - 1;
-end while;
-set t1 = current_time();
+set t0 = unix_timestamp();
+select sleep(1.1);
+set t1 = unix_timestamp();
 if t1 > t0 then
 set plus = 1;
 end if;
 select plus;
 end|
+call bug6857()|
+sleep(1.1)
+0
+plus
+1
 drop procedure bug6857|
 drop procedure if exists bug8757|
 create procedure bug8757()

=== modified file 'mysql-test/r/xa.result'
--- a/mysql-test/r/xa.result	2011-04-14 08:47:14 +0000
+++ b/mysql-test/r/xa.result	2011-12-22 10:38:23 +0000
@@ -206,8 +206,8 @@ DROP TABLE t1;
 #                FAILED 
 #
 DROP TABLE IF EXISTS t1, t2;
-CREATE TABLE t1 (a INT);
-CREATE TABLE t2 (a INT);
+CREATE TABLE t1 (a INT) ENGINE=InnoDB;
+CREATE TABLE t2 (a INT) ENGINE=InnoDB;
 START TRANSACTION;
 INSERT INTO t1 VALUES (1);
 # Connection con2

=== modified file 'mysql-test/suite/rpl/r/rpl_known_bugs_detection.result'
--- a/mysql-test/suite/rpl/r/rpl_known_bugs_detection.result	2011-11-19 08:08:03 +0000
+++ b/mysql-test/suite/rpl/r/rpl_known_bugs_detection.result	2011-12-26 17:21:34 +0000
@@ -3,6 +3,7 @@ Warnings:
 Note	1756	Sending passwords in plain text without SSL/TLS is extremely insecure.
 Note	1757	Storing MySQL user name or password information in the master.info repository is not secure and is therefore not recommended. Please see the MySQL Manual for more about this issue and possible alternatives.
 [connection master]
+call mtr.add_suppression("Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT.");
 CREATE TABLE t1 (a INT NOT NULL PRIMARY KEY AUTO_INCREMENT, b INT,
 UNIQUE(b));
 INSERT INTO t1(b) VALUES(1),(1),(2) ON DUPLICATE KEY UPDATE t1.b=10;

=== modified file 'mysql-test/suite/rpl/t/rpl_known_bugs_detection.test'
--- a/mysql-test/suite/rpl/t/rpl_known_bugs_detection.test	2011-09-29 10:42:53 +0000
+++ b/mysql-test/suite/rpl/t/rpl_known_bugs_detection.test	2011-12-26 17:21:34 +0000
@@ -8,6 +8,8 @@ source include/have_debug.inc;
 source include/have_binlog_checksum_off.inc;
 source include/master-slave.inc;
 
+call mtr.add_suppression("Unsafe statement written to the binary log using statement format since BINLOG_FORMAT = STATEMENT.");
+
 # Currently only statement-based-specific bugs are here
 -- source include/have_binlog_format_statement.inc
 

=== modified file 'mysql-test/t/func_group.test'
--- a/mysql-test/t/func_group.test	2011-04-12 10:31:30 +0000
+++ b/mysql-test/t/func_group.test	2011-12-22 13:36:08 +0000
@@ -1184,3 +1184,12 @@ SELECT AVG(DISTINCT outr.col_int_nokey)
 outr.col_int_nokey = outr2.col_int_nokey;
 DROP TABLE t1;
 --echo # End of the bug#57932
+
+--echo #
+--echo # BUG#12773464 - 61925: WRONG RESULT WITH AGGREGATE + NOT BETWEEN + KEY
+--echo #
+CREATE TABLE t1 (a int, KEY (a));
+INSERT INTO t1 VALUES (1),(2),(3),(4),(5),(6),(7),(8),(9),(10);
+SELECT MAX(a) FROM t1 WHERE a NOT BETWEEN 3 AND 9;
+
+DROP TABLE t1;

=== modified file 'mysql-test/t/init_connect.test'
--- a/mysql-test/t/init_connect.test	2009-07-06 22:20:17 +0000
+++ b/mysql-test/t/init_connect.test	2012-01-02 06:25:48 +0000
@@ -36,6 +36,14 @@ connection con0;
 set GLOBAL init_connect="adsfsdfsdfs";
 connect (con5,localhost,user_1,,);
 connection con5;
+# BUG#11755281/47032: ERROR 2006 / ERROR 2013 INSTEAD OF PROPER ERROR MESSAGE
+# We now throw a proper error message here:
+--replace_regex /connection .* to/connection to/
+--error ER_NEW_ABORTING_CONNECTION
+select @a;
+# We got disconnected after receiving the above error message; any further
+# requests should fail with a notice that no one's listening to us.
+# --error CR_SERVER_GONE_ERROR,CR_SERVER_LOST
 --error 2013,2006
 select @a;
 connection con0;

=== added file 'mysql-test/t/mysql_embedded-master.opt'
--- a/mysql-test/t/mysql_embedded-master.opt	1970-01-01 00:00:00 +0000
+++ b/mysql-test/t/mysql_embedded-master.opt	2011-12-23 08:55:18 +0000
@@ -0,0 +1,3 @@
+--default-storage-engine=MyISAM 
+--default-tmp-storage-engine=MyISAM 
+--skip-innodb 

=== modified file 'mysql-test/t/mysqldump.test'
--- a/mysql-test/t/mysqldump.test	2011-05-10 13:37:37 +0000
+++ b/mysql-test/t/mysqldump.test	2011-12-24 09:43:56 +0000
@@ -2239,5 +2239,43 @@ CREATE TABLE t1 (a INT);
 --exec $MYSQL_DUMP --compatible=no_t,no_f --skip-comments test
 DROP TABLE t1;
 
+--echo #
+--echo # Bug#12809202 61854: MYSQLDUMP --SINGLE-TRANSACTION --FLUSH-LOG BREAKS
+--echo #                     CONSISTENCY
+--echo #
+
+--disable_warnings
+DROP DATABASE IF EXISTS b12809202_db;
+--enable_warnings
+
+CREATE DATABASE b12809202_db;
+CREATE TABLE b12809202_db.t1 (c1 INT);
+CREATE TABLE b12809202_db.t2 (c1 INT);
+
+INSERT INTO b12809202_db.t1 VALUES (1), (2), (3);
+INSERT INTO b12809202_db.t2 VALUES (1), (2), (3);
+
+--echo # Starting mysqldump with --single-transaction & --flush-log options..
+--echo # Note : In the following dump the transaction
+--echo #        should start only after the logs are
+--echo #        flushed, as 'flush logs' causes implicit
+--echo #        commit starting 5.5.
+--echo
+--echo #### Dump starts here ####
+--replace_regex /-- Server version.*// /-- MySQL dump .*// /-- Dump completed on .*/-- Dump completed/
+--exec $MYSQL_DUMP --verbose --single-transaction --flush-log b12809202_db 2>&1
+--echo
+--echo #### Dump ends here ####
+
+# Cleanup
+DROP TABLE b12809202_db.t1;
+DROP TABLE b12809202_db.t2;
+DROP DATABASE b12809202_db;
+
+--echo #
+--echo # Delete all existing binary logs.
+--echo #
+RESET MASTER;
+
 # Wait till we reached the initial number of concurrent sessions
 --source include/wait_until_count_sessions.inc

=== modified file 'mysql-test/t/sp.test'
--- a/mysql-test/t/sp.test	2011-12-14 17:08:53 +0000
+++ b/mysql-test/t/sp.test	2011-12-22 17:43:39 +0000
@@ -3190,30 +3190,23 @@ drop table t3|
 #
 # BUG#6857: current_time() in STORED PROCEDURES
 #
---disable_warnings ONCE
+--disable_warnings 
 drop procedure if exists bug6857|
-create procedure bug6857(counter int)
+--enable_warnings
+create procedure bug6857()
 begin
   declare t0, t1 int;
   declare plus bool default 0;
-
-  set t0 = current_time();
-  while counter > 0 do
-    set counter = counter - 1;
-  end while;
-  set t1 = current_time();
+  set t0 = unix_timestamp();
+  select sleep(1.1);
+  set t1 = unix_timestamp();
   if t1 > t0 then
     set plus = 1;
   end if;
   select plus;
 end|
 
-# QQ: This is currently disabled. Not only does it slow down a normal test
-#     run, it makes running with valgrind (or similar tools) extremely
-#     painful.
-# Make sure this takes at least one second on all machines in all builds.
-# 30000 makes it about 3 seconds on an old 1.1GHz linux.
-#call bug6857(300000)|
+call bug6857()|
 
 drop procedure bug6857|
 

=== modified file 'mysql-test/t/xa.test'
--- a/mysql-test/t/xa.test	2011-05-31 13:52:09 +0000
+++ b/mysql-test/t/xa.test	2011-12-22 09:27:58 +0000
@@ -343,8 +343,8 @@ DROP TABLE t1;
 DROP TABLE IF EXISTS t1, t2;
 --enable_warnings
 
-CREATE TABLE t1 (a INT);
-CREATE TABLE t2 (a INT);
+CREATE TABLE t1 (a INT) ENGINE=InnoDB;
+CREATE TABLE t2 (a INT) ENGINE=InnoDB;
 
 START TRANSACTION;
 INSERT INTO t1 VALUES (1);

=== modified file 'mysys/my_conio.c'
--- a/mysys/my_conio.c	2011-06-30 15:50:45 +0000
+++ b/mysys/my_conio.c	2011-12-29 11:31:18 +0000
@@ -21,9 +21,20 @@
 
 /* Windows console handling */
 
+/*
+  TODO : Find a relationship between the following
+         two macros and get rid of one.
+*/
+
 /* Maximum line length on Windows console */
 #define MAX_CONSOLE_LINE_SIZE 65535
 
+/*
+  Maximum number of characters that can be entered
+  on single line in the console (including \r\n).
+*/
+#define MAX_NUM_OF_CHARS_TO_READ 26600
+
 /**
   Determine if a file is a windows console
 
@@ -62,34 +73,44 @@ char *
 my_win_console_readline(const CHARSET_INFO *cs, char *mbbuf, size_t mbbufsize)
 {
   uint dummy_errors;
-  static wchar_t u16buf[MAX_CONSOLE_LINE_SIZE + 1], *pos;
-  size_t mblen;
+  static wchar_t u16buf[MAX_CONSOLE_LINE_SIZE + 1];
+  size_t mblen= 0;
+
   DWORD console_mode;
+  DWORD nchars;
+
   HANDLE console= GetStdHandle(STD_INPUT_HANDLE);
 
   DBUG_ASSERT(mbbufsize > 0); /* Need space for at least trailing '\0' */
   GetConsoleMode(console, &console_mode);
   SetConsoleMode(console, ENABLE_LINE_INPUT |
                           ENABLE_PROCESSED_INPUT | ENABLE_ECHO_INPUT);
-  for(pos= u16buf; pos < &u16buf[MAX_CONSOLE_LINE_SIZE] ; )
+
+  if (!ReadConsoleW(console, u16buf, MAX_NUM_OF_CHARS_TO_READ, &nchars, NULL))
   {
-    DWORD nchars;
-    if (!ReadConsoleW(console, pos, 1, &nchars, NULL) || nchars == 0)
-    {
-      SetConsoleMode(console, console_mode);
-      return NULL;
-    }
-    if (*pos == L'\r') /* We don't need '\r' in the result string, skip it */
-      continue;
-    if (*pos == L'\n')
-      break;
-    pos++;
+    SetConsoleMode(console, console_mode);
+    return NULL;
   }
+
+  /* Set length of string */
+  if (nchars >= 2 && u16buf[nchars - 2] == L'\r')
+    nchars-= 2;
+  else if ((nchars == MAX_NUM_OF_CHARS_TO_READ) &&
+           (u16buf[nchars - 1] == L'\r'))
+    /* Special case 1 - \r\n straddles the boundary */
+    nchars--;
+  else if ((nchars == 1) && (u16buf[0] == L'\n'))
+    /* Special case 2 - read a single '\n'*/
+    nchars--;
+
   SetConsoleMode(console, console_mode);
+
   /* Convert Unicode to session character set */
-  mblen= my_convert(mbbuf, mbbufsize - 1, cs,
-                    (const char *) u16buf, (pos - u16buf) * sizeof(wchar_t),
-                    &my_charset_utf16le_bin, &dummy_errors);
+  if (nchars != 0)
+    mblen= my_convert(mbbuf, mbbufsize - 1, cs,
+                      (const char *) u16buf, nchars * sizeof(wchar_t),
+                      &my_charset_utf16le_bin, &dummy_errors);
+
   DBUG_ASSERT(mblen < mbbufsize); /* Safety */
   mbbuf[mblen]= 0;
   return mbbuf;

=== modified file 'sql/field.cc'
--- a/sql/field.cc	2011-12-15 15:15:37 +0000
+++ b/sql/field.cc	2012-01-04 12:47:36 +0000
@@ -5925,7 +5925,8 @@ int
 Field_year::store_time(MYSQL_TIME *ltime,
                        uint8 dec_arg __attribute__((unused)))
 {
-  if (ltime->time_type != MYSQL_TIMESTAMP_DATETIME)
+  if (ltime->time_type != MYSQL_TIMESTAMP_DATETIME &&
+      ltime->time_type != MYSQL_TIMESTAMP_DATE)
   {
     /* Convert time to datetime, then store year of the result */
     THD *thd= table ? table->in_use : current_thd;

=== modified file 'sql/opt_sum.cc'
--- a/sql/opt_sum.cc	2011-07-21 17:51:38 +0000
+++ b/sql/opt_sum.cc	2011-12-22 13:36:08 +0000
@@ -674,6 +674,11 @@ static bool matching_cond(bool max_fl, T
     break;
   case Item_func::BETWEEN:
     between= 1;
+
+    // NOT BETWEEN is equivalent to OR and is therefore not a conjunction
+    if (((Item_func_between*)cond)->negated)
+      DBUG_RETURN(false);
+
     break;
   case Item_func::MULT_EQUAL_FUNC:
     eq_type= 1;

=== modified file 'sql/rpl_slave.cc'
--- a/sql/rpl_slave.cc	2011-12-16 15:53:16 +0000
+++ b/sql/rpl_slave.cc	2012-01-04 06:06:07 +0000
@@ -4402,6 +4402,7 @@ err:
   if (error && w)
   {
     w->end_info();
+    delete_dynamic(&w->jobs.Q);
     delete w;
     /*
       Any failure after dynarray inserted must follow with deletion
@@ -7453,10 +7454,10 @@ bool change_master(THD* thd, Master_info
   mi->rli->clear_until_condition();
 
   sql_print_information("'CHANGE MASTER TO executed'. "
-    "Previous state master_host='%s', master_port='%u', master_log_file='%s', "
-    "master_log_pos='%ld', master_bind='%s'. "
-    "New state master_host='%s', master_port='%u', master_log_file='%s', "
-    "master_log_pos='%ld', master_bind='%s'.", 
+    "Previous state master_host='%s', master_port= %u, master_log_file='%s', "
+    "master_log_pos= %ld, master_bind='%s'. "
+    "New state master_host='%s', master_port= %u, master_log_file='%s', "
+    "master_log_pos= %ld, master_bind='%s'.", 
     saved_host, saved_port, saved_log_name, (ulong) saved_log_pos,
     saved_bind_addr, mi->host, mi->port, mi->get_master_log_name(),
     (ulong) mi->get_master_log_pos(), mi->bind_addr);

=== modified file 'sql/sql_class.h'
--- a/sql/sql_class.h	2011-12-16 16:40:15 +0000
+++ b/sql/sql_class.h	2012-01-04 17:07:47 +0000
@@ -577,19 +577,6 @@ typedef struct system_status_var
   ulonglong ha_savepoint_count;
   ulonglong ha_savepoint_rollback_count;
   ulonglong ha_external_lock_count;
-
-#if 0
-  /* Tatiana thinks this may be dead now. */
-  /* KEY_CACHE parts. These are copies of the original */
-  ulonglong key_blocks_changed;
-  ulonglong key_blocks_used;
-  ulonglong key_cache_r_requests;
-  ulonglong key_cache_read;
-  ulonglong key_cache_w_requests;
-  ulonglong key_cache_write;
-  /* END OF KEY_CACHE parts */
-#endif
-
   ulonglong opened_tables;
   ulonglong opened_shares;
   ulonglong select_full_join_count;

=== modified file 'sql/sql_connect.cc'
--- a/sql/sql_connect.cc	2011-11-18 12:28:10 +0000
+++ b/sql/sql_connect.cc	2012-01-02 07:09:32 +0000
@@ -671,13 +671,38 @@ void prepare_new_connection_state(THD* t
     execute_init_command(thd, &opt_init_connect, &LOCK_sys_init_connect);
     if (thd->is_error())
     {
-      thd->killed= THD::KILL_CONNECTION;
+      ulong packet_length;
+      NET *net= &thd->net;
+
       sql_print_warning(ER(ER_NEW_ABORTING_CONNECTION),
-                        thd->thread_id,(thd->db ? thd->db : "unconnected"),
+                        thd->thread_id,
+                        thd->db ? thd->db : "unconnected",
                         sctx->user ? sctx->user : "unauthenticated",
                         sctx->host_or_ip, "init_connect command failed");
       sql_print_warning("%s", thd->get_stmt_da()->message());
+
+      thd->lex->current_select= 0;
+      my_net_set_read_timeout(net, thd->variables.net_wait_timeout);
+      thd->clear_error();
+      net_new_transaction(net);
+      packet_length= my_net_read(net);
+      /*
+        If my_net_read() failed, my_error() has been already called,
+        and the main Diagnostics Area contains an error condition.
+      */
+      if (packet_length != packet_error)
+        my_error(ER_NEW_ABORTING_CONNECTION, MYF(0),
+                 thd->thread_id,
+                 thd->db ? thd->db : "unconnected",
+                 sctx->user ? sctx->user : "unauthenticated",
+                 sctx->host_or_ip, "init_connect command failed");
+
+      thd->server_status&= ~SERVER_STATUS_CLEAR_SET;
+      thd->protocol->end_statement();
+      thd->killed = THD::KILL_CONNECTION;
+      return;
     }
+
     thd->proc_info=0;
     thd->set_time();
     thd->init_for_queries();

=== modified file 'sql/sql_parse.cc'
--- a/sql/sql_parse.cc	2011-12-09 08:59:22 +0000
+++ b/sql/sql_parse.cc	2012-01-02 05:52:38 +0000
@@ -1509,7 +1509,7 @@ bool dispatch_command(enum enum_server_c
 
     length= my_snprintf(buff, buff_len - 1,
                         "Uptime: %lu  Threads: %d  Questions: %lu  "
-                        "Slow queries: %lu  Opens: %lu  Flush tables: %lu  "
+                        "Slow queries: %llu  Opens: %llu  Flush tables: %lu  "
                         "Open tables: %u  Queries per second avg: %u.%03u",
                         uptime,
                         (int) thread_count, (ulong) thd->query_id,

=== modified file 'storage/federated/ha_federated.cc'
--- a/storage/federated/ha_federated.cc	2011-11-17 13:41:28 +0000
+++ b/storage/federated/ha_federated.cc	2011-12-23 15:00:18 +0000
@@ -1683,6 +1683,16 @@ int ha_federated::close(void)
   mysql_close(mysql);
   mysql= NULL;
 
+  /*
+    mysql_close() might return an error if a remote server's gone
+    for some reason. If that happens while removing a table from
+    the table cache, the error will be propagated to a client even
+    if the original query was not issued against the FEDERATED table.
+    So, don't propagate errors from mysql_close().
+  */
+  if (table->in_use)
+    table->in_use->clear_error();
+
   DBUG_RETURN(free_share(share));
 }
 

=== modified file 'storage/innobase/CMakeLists.txt'
--- a/storage/innobase/CMakeLists.txt	2011-11-30 10:09:12 +0000
+++ b/storage/innobase/CMakeLists.txt	2011-12-23 13:17:36 +0000
@@ -231,6 +231,7 @@ SET(INNOBASE_SOURCES
 	btr/btr0sea.cc
 	buf/buf0buddy.cc
 	buf/buf0buf.cc
+	buf/buf0dblwr.cc
 	buf/buf0checksum.cc
 	buf/buf0dump.cc
 	buf/buf0flu.cc

=== modified file 'storage/innobase/buf/buf0buf.cc'
--- a/storage/innobase/buf/buf0buf.cc	2011-12-19 08:43:28 +0000
+++ b/storage/innobase/buf/buf0buf.cc	2011-12-28 10:40:55 +0000
@@ -1041,11 +1041,8 @@ buf_chunk_init(
 	for (i = chunk->size; i--; ) {
 
 		buf_block_init(buf_pool, block, frame);
+		UNIV_MEM_INVALID(block->frame, UNIV_PAGE_SIZE);
 
-#ifdef HAVE_purify
-		/* Wipe contents of frame to eliminate a Purify warning */
-		memset(block->frame, '\0', UNIV_PAGE_SIZE);
-#endif
 		/* Add the block to the free list */
 		UT_LIST_ADD_LAST(list, buf_pool->free, (&block->page));
 
@@ -3902,7 +3899,7 @@ buf_page_io_complete(
 			frame + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
 
 		if (bpage->space == TRX_SYS_SPACE
-		    && trx_doublewrite_page_inside(bpage->offset)) {
+		    && buf_dblwr_page_inside(bpage->offset)) {
 
 			ut_print_timestamp(stderr);
 			fprintf(stderr,

=== added file 'storage/innobase/buf/buf0dblwr.cc'
--- a/storage/innobase/buf/buf0dblwr.cc	1970-01-01 00:00:00 +0000
+++ b/storage/innobase/buf/buf0dblwr.cc	2011-12-23 13:17:36 +0000
@@ -0,0 +1,1112 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file buf/buf0dblwr.cc
+Doublwrite buffer module
+
+Created 2011/12/19
+*******************************************************/
+
+#include "buf0dblwr.h"
+
+#ifdef UNIV_NONINL
+#include "buf0buf.ic"
+#endif
+
+#include "buf0buf.h"
+#include "buf0checksum.h"
+#include "srv0start.h"
+#include "srv0srv.h"
+#include "page0zip.h"
+#include "trx0sys.h"
+
+#ifndef UNIV_HOTBACKUP
+
+/** Time in milliseconds that we sleep when unable to find a slot in
+the doublewrite buffer or when we have to wait for a running batch
+to end. */
+#define TRX_DOUBLEWRITE_BATCH_POLL_DELAY	10000
+
+#ifdef UNIV_PFS_MUTEX
+/* Key to register the mutex with performance schema */
+UNIV_INTERN mysql_pfs_key_t	buf_dblwr_mutex_key;
+#endif /* UNIV_PFS_RWLOCK */
+
+/** The doublewrite buffer */
+UNIV_INTERN buf_dblwr_t*	buf_dblwr = NULL;
+
+/** Set to TRUE when the doublewrite buffer is being created */
+UNIV_INTERN ibool	buf_dblwr_being_created = FALSE;
+
+/****************************************************************//**
+Determines if a page number is located inside the doublewrite buffer.
+@return TRUE if the location is inside the two blocks of the
+doublewrite buffer */
+UNIV_INTERN
+ibool
+buf_dblwr_page_inside(
+/*==================*/
+	ulint	page_no)	/*!< in: page number */
+{
+	if (buf_dblwr == NULL) {
+
+		return(FALSE);
+	}
+
+	if (page_no >= buf_dblwr->block1
+	    && page_no < buf_dblwr->block1
+	    + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
+		return(TRUE);
+	}
+
+	if (page_no >= buf_dblwr->block2
+	    && page_no < buf_dblwr->block2
+	    + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
+		return(TRUE);
+	}
+
+	return(FALSE);
+}
+
+/****************************************************************//**
+Calls buf_page_get() on the TRX_SYS_PAGE and returns a pointer to the
+doublewrite buffer within it.
+@return	pointer to the doublewrite buffer within the filespace header
+page. */
+UNIV_INLINE
+byte*
+buf_dblwr_get(
+/*==========*/
+	mtr_t*	mtr)	/*!< in/out: MTR to hold the page latch */
+{
+	buf_block_t*	block;
+
+	block = buf_page_get(TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO,
+			     RW_X_LATCH, mtr);
+	buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
+
+	return(buf_block_get_frame(block) + TRX_SYS_DOUBLEWRITE);
+}
+
+
+/****************************************************************//**
+Creates or initialializes the doublewrite buffer at a database start. */
+static
+void
+buf_dblwr_init(
+/*===========*/
+	byte*	doublewrite)	/*!< in: pointer to the doublewrite buf
+				header on trx sys page */
+{
+	ulint	buf_size;
+
+	buf_dblwr = static_cast<buf_dblwr_t*>(
+		mem_zalloc(sizeof(buf_dblwr_t)));
+
+	/* There are two blocks of same size in the doublewrite
+	buffer. */
+	buf_size = 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE;
+
+	/* There must be atleast one buffer for single page writes
+	and one buffer for batch writes. */
+	ut_a(srv_doublewrite_batch_size > 0
+	     && srv_doublewrite_batch_size < buf_size);
+
+	mutex_create(buf_dblwr_mutex_key,
+		     &buf_dblwr->mutex, SYNC_DOUBLEWRITE);
+
+	buf_dblwr->first_free = 0;
+	buf_dblwr->s_reserved = 0;
+	buf_dblwr->b_reserved = 0;
+
+	buf_dblwr->block1 = mach_read_from_4(
+		doublewrite + TRX_SYS_DOUBLEWRITE_BLOCK1);
+	buf_dblwr->block2 = mach_read_from_4(
+		doublewrite + TRX_SYS_DOUBLEWRITE_BLOCK2);
+
+	buf_dblwr->in_use = static_cast<ibool*>(
+		mem_zalloc(buf_size * sizeof(ibool)));
+
+	buf_dblwr->write_buf_unaligned = static_cast<byte*>(
+		ut_malloc((1 + buf_size) * UNIV_PAGE_SIZE));
+
+	buf_dblwr->write_buf = static_cast<byte*>(
+		ut_align(buf_dblwr->write_buf_unaligned,
+			 UNIV_PAGE_SIZE));
+
+	buf_dblwr->buf_block_arr = static_cast<buf_page_t**>(
+		mem_zalloc(buf_size * sizeof(void*)));
+}
+
+/****************************************************************//**
+Creates the doublewrite buffer to a new InnoDB installation. The header of the
+doublewrite buffer is placed on the trx system header page. */
+UNIV_INTERN
+void
+buf_dblwr_create(void)
+/*==================*/
+{
+	buf_block_t*	block2;
+#ifdef UNIV_SYNC_DEBUG
+	buf_block_t*	new_block;
+#endif /* UNIV_SYNC_DEBUG */
+	byte*	doublewrite;
+	byte*	fseg_header;
+	ulint	page_no;
+	ulint	prev_page_no;
+	ulint	i;
+	mtr_t	mtr;
+
+	if (buf_dblwr) {
+		/* Already inited */
+
+		return;
+	}
+
+start_again:
+	mtr_start(&mtr);
+	buf_dblwr_being_created = TRUE;
+
+	doublewrite = buf_dblwr_get(&mtr);
+
+	if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC)
+	    == TRX_SYS_DOUBLEWRITE_MAGIC_N) {
+		/* The doublewrite buffer has already been created:
+		just read in some numbers */
+
+		buf_dblwr_init(doublewrite);
+
+		mtr_commit(&mtr);
+		buf_dblwr_being_created = FALSE;
+		return;
+	}
+
+	ut_print_timestamp(stderr);
+	fprintf(stderr,
+		" InnoDB: Doublewrite buffer not found:"
+		" creating new\n");
+
+	if (buf_pool_get_curr_size()
+	    < ((2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
+		+ FSP_EXTENT_SIZE / 2 + 100)
+	       * UNIV_PAGE_SIZE)) {
+		fprintf(stderr,
+			"InnoDB: Cannot create doublewrite buffer:"
+			" you must\n"
+			"InnoDB: increase your buffer pool size.\n"
+			"InnoDB: Cannot continue operation.\n");
+
+		exit(1);
+	}
+
+	block2 = fseg_create(TRX_SYS_SPACE, TRX_SYS_PAGE_NO,
+			     TRX_SYS_DOUBLEWRITE
+			     + TRX_SYS_DOUBLEWRITE_FSEG, &mtr);
+
+	/* fseg_create acquires a second latch on the page,
+	therefore we must declare it: */
+
+	buf_block_dbg_add_level(block2, SYNC_NO_ORDER_CHECK);
+
+	if (block2 == NULL) {
+		fprintf(stderr,
+			"InnoDB: Cannot create doublewrite buffer:"
+			" you must\n"
+			"InnoDB: increase your tablespace size.\n"
+			"InnoDB: Cannot continue operation.\n");
+
+		/* We exit without committing the mtr to prevent
+		its modifications to the database getting to disk */
+
+		exit(1);
+	}
+
+	fseg_header = doublewrite + TRX_SYS_DOUBLEWRITE_FSEG;
+	prev_page_no = 0;
+
+	for (i = 0; i < 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
+		     + FSP_EXTENT_SIZE / 2; i++) {
+		page_no = fseg_alloc_free_page(fseg_header,
+					       prev_page_no + 1,
+					       FSP_UP, &mtr);
+		if (page_no == FIL_NULL) {
+			fprintf(stderr,
+				"InnoDB: Cannot create doublewrite"
+				" buffer: you must\n"
+				"InnoDB: increase your"
+				" tablespace size.\n"
+				"InnoDB: Cannot continue operation.\n"
+				);
+
+			exit(1);
+		}
+
+		/* We read the allocated pages to the buffer pool;
+		when they are written to disk in a flush, the space
+		id and page number fields are also written to the
+		pages. When we at database startup read pages
+		from the doublewrite buffer, we know that if the
+		space id and page number in them are the same as
+		the page position in the tablespace, then the page
+		has not been written to in doublewrite. */
+
+#ifdef UNIV_SYNC_DEBUG
+		new_block =
+#endif /* UNIV_SYNC_DEBUG */
+		buf_page_get(TRX_SYS_SPACE, 0, page_no,
+			     RW_X_LATCH, &mtr);
+		buf_block_dbg_add_level(new_block,
+					SYNC_NO_ORDER_CHECK);
+
+		if (i == FSP_EXTENT_SIZE / 2) {
+			ut_a(page_no == FSP_EXTENT_SIZE);
+			mlog_write_ulint(doublewrite
+					 + TRX_SYS_DOUBLEWRITE_BLOCK1,
+					 page_no, MLOG_4BYTES, &mtr);
+			mlog_write_ulint(doublewrite
+					 + TRX_SYS_DOUBLEWRITE_REPEAT
+					 + TRX_SYS_DOUBLEWRITE_BLOCK1,
+					 page_no, MLOG_4BYTES, &mtr);
+
+		} else if (i == FSP_EXTENT_SIZE / 2
+			   + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
+			ut_a(page_no == 2 * FSP_EXTENT_SIZE);
+			mlog_write_ulint(doublewrite
+					 + TRX_SYS_DOUBLEWRITE_BLOCK2,
+					 page_no, MLOG_4BYTES, &mtr);
+			mlog_write_ulint(doublewrite
+					 + TRX_SYS_DOUBLEWRITE_REPEAT
+					 + TRX_SYS_DOUBLEWRITE_BLOCK2,
+					 page_no, MLOG_4BYTES, &mtr);
+
+		} else if (i > FSP_EXTENT_SIZE / 2) {
+			ut_a(page_no == prev_page_no + 1);
+		}
+
+		if (((i + 1) & 15) == 0) {
+			/* rw_locks can only be recursively x-locked
+			2048 times. (on 32 bit platforms,
+			(lint) 0 - (X_LOCK_DECR * 2049)
+			is no longer a negative number, and thus
+			lock_word becomes like a shared lock).
+			For 4k page size this loop will
+			lock the fseg header too many times. Since
+			this code is not done while any other threads
+			are active, restart the MTR occasionally. */
+			mtr_commit(&mtr);
+			mtr_start(&mtr);
+			doublewrite = buf_dblwr_get(&mtr);
+			fseg_header = doublewrite
+				      + TRX_SYS_DOUBLEWRITE_FSEG;
+		}
+
+		prev_page_no = page_no;
+	}
+
+	mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC,
+			 TRX_SYS_DOUBLEWRITE_MAGIC_N,
+			 MLOG_4BYTES, &mtr);
+	mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC
+			 + TRX_SYS_DOUBLEWRITE_REPEAT,
+			 TRX_SYS_DOUBLEWRITE_MAGIC_N,
+			 MLOG_4BYTES, &mtr);
+
+	mlog_write_ulint(doublewrite
+			 + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED,
+			 TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N,
+			 MLOG_4BYTES, &mtr);
+	mtr_commit(&mtr);
+
+	/* Flush the modified pages to disk and make a checkpoint */
+	log_make_checkpoint_at(LSN_MAX, TRUE);
+
+	/* Remove doublewrite pages from LRU */
+	buf_pool_invalidate();
+
+	ut_print_timestamp(stderr);
+	fprintf(stderr, " InnoDB: Doublewrite buffer created\n");
+
+	goto start_again;
+}
+
+/****************************************************************//**
+At a database startup initializes the doublewrite buffer memory structure if
+we already have a doublewrite buffer created in the data files. If we are
+upgrading to an InnoDB version which supports multiple tablespaces, then this
+function performs the necessary update operations. If we are in a crash
+recovery, this function uses a possible doublewrite buffer to restore
+half-written pages in the data files. */
+UNIV_INTERN
+void
+buf_dblwr_init_or_restore_pages(
+/*============================*/
+	ibool	restore_corrupt_pages)	/*!< in: TRUE=restore pages */
+{
+	byte*	buf;
+	byte*	read_buf;
+	byte*	unaligned_read_buf;
+	ulint	block1;
+	ulint	block2;
+	byte*	page;
+	ibool	reset_space_ids = FALSE;
+	byte*	doublewrite;
+	ulint	space_id;
+	ulint	page_no;
+	ulint	i;
+
+	/* We do the file i/o past the buffer pool */
+
+	unaligned_read_buf = static_cast<byte*>(ut_malloc(2 * UNIV_PAGE_SIZE));
+
+	read_buf = static_cast<byte*>(
+		ut_align(unaligned_read_buf, UNIV_PAGE_SIZE));
+
+	/* Read the trx sys header to check if we are using the doublewrite
+	buffer */
+
+	fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, 0,
+	       UNIV_PAGE_SIZE, read_buf, NULL);
+	doublewrite = read_buf + TRX_SYS_DOUBLEWRITE;
+
+	if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC)
+	    == TRX_SYS_DOUBLEWRITE_MAGIC_N) {
+		/* The doublewrite buffer has been created */
+
+		buf_dblwr_init(doublewrite);
+
+		block1 = buf_dblwr->block1;
+		block2 = buf_dblwr->block2;
+
+		buf = buf_dblwr->write_buf;
+	} else {
+		goto leave_func;
+	}
+
+	if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED)
+	!= TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N) {
+
+		/* We are upgrading from a version < 4.1.x to a version where
+		multiple tablespaces are supported. We must reset the space id
+		field in the pages in the doublewrite buffer because starting
+		from this version the space id is stored to
+		FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID. */
+
+		reset_space_ids = TRUE;
+
+		fprintf(stderr,
+			"InnoDB: Resetting space id's in the"
+			" doublewrite buffer\n");
+	}
+
+	/* Read the pages from the doublewrite buffer to memory */
+
+	fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, block1, 0,
+	       TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
+	       buf, NULL);
+	fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, block2, 0,
+	       TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
+	       buf + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
+	       NULL);
+	/* Check if any of these pages is half-written in data files, in the
+	intended position */
+
+	page = buf;
+
+	for (i = 0; i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 2; i++) {
+
+		ulint source_page_no;
+		page_no = mach_read_from_4(page + FIL_PAGE_OFFSET);
+
+		if (reset_space_ids) {
+
+			space_id = 0;
+			mach_write_to_4(page
+					+ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 0);
+			/* We do not need to calculate new checksums for the
+			pages because the field .._SPACE_ID does not affect
+			them. Write the page back to where we read it from. */
+
+			if (i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
+				source_page_no = block1 + i;
+			} else {
+				source_page_no = block2
+					+ i - TRX_SYS_DOUBLEWRITE_BLOCK_SIZE;
+			}
+
+			fil_io(OS_FILE_WRITE, TRUE, 0, 0, source_page_no, 0,
+			       UNIV_PAGE_SIZE, page, NULL);
+		} else {
+
+			space_id = mach_read_from_4(
+				page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
+		}
+
+		if (!restore_corrupt_pages) {
+			/* The database was shut down gracefully: no need to
+			restore pages */
+
+		} else if (!fil_tablespace_exists_in_mem(space_id)) {
+			/* Maybe we have dropped the single-table tablespace
+			and this page once belonged to it: do nothing */
+
+		} else if (!fil_check_adress_in_tablespace(space_id,
+							   page_no)) {
+			fprintf(stderr,
+				"InnoDB: Warning: a page in the"
+				" doublewrite buffer is not within space\n"
+				"InnoDB: bounds; space id %lu"
+				" page number %lu, page %lu in"
+				" doublewrite buf.\n",
+				(ulong) space_id, (ulong) page_no, (ulong) i);
+
+		} else if (space_id == TRX_SYS_SPACE
+			   && ((page_no >= block1
+				&& page_no
+				< block1 + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)
+			       || (page_no >= block2
+				   && page_no
+				   < (block2
+				      + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)))) {
+
+			/* It is an unwritten doublewrite buffer page:
+			do nothing */
+		} else {
+			ulint	zip_size = fil_space_get_zip_size(space_id);
+
+			/* Read in the actual page from the file */
+			fil_io(OS_FILE_READ, TRUE, space_id, zip_size,
+			       page_no, 0,
+			       zip_size ? zip_size : UNIV_PAGE_SIZE,
+			       read_buf, NULL);
+
+			/* Check if the page is corrupt */
+
+			if (UNIV_UNLIKELY
+			    (buf_page_is_corrupted(read_buf, zip_size))) {
+
+				fprintf(stderr,
+					"InnoDB: Warning: database page"
+					" corruption or a failed\n"
+					"InnoDB: file read of"
+					" space %lu page %lu.\n"
+					"InnoDB: Trying to recover it from"
+					" the doublewrite buffer.\n",
+					(ulong) space_id, (ulong) page_no);
+
+				if (buf_page_is_corrupted(page, zip_size)) {
+					fprintf(stderr,
+						"InnoDB: Dump of the page:\n");
+					buf_page_print(read_buf, zip_size);
+					fprintf(stderr,
+						"InnoDB: Dump of"
+						" corresponding page"
+						" in doublewrite buffer:\n");
+					buf_page_print(page, zip_size);
+
+					fprintf(stderr,
+						"InnoDB: Also the page in the"
+						" doublewrite buffer"
+						" is corrupt.\n"
+						"InnoDB: Cannot continue"
+						" operation.\n"
+						"InnoDB: You can try to"
+						" recover the database"
+						" with the my.cnf\n"
+						"InnoDB: option:\n"
+						"InnoDB:"
+						" innodb_force_recovery=6\n");
+					exit(1);
+				}
+
+				/* Write the good page from the
+				doublewrite buffer to the intended
+				position */
+
+				fil_io(OS_FILE_WRITE, TRUE, space_id,
+				       zip_size, page_no, 0,
+				       zip_size ? zip_size : UNIV_PAGE_SIZE,
+				       page, NULL);
+				fprintf(stderr,
+					"InnoDB: Recovered the page from"
+					" the doublewrite buffer.\n");
+			}
+		}
+
+		page += UNIV_PAGE_SIZE;
+	}
+
+	fil_flush_file_spaces(FIL_TABLESPACE);
+
+leave_func:
+	ut_free(unaligned_read_buf);
+}
+
+/****************************************************************//**
+Frees doublewrite buffer. */
+UNIV_INTERN
+void
+buf_dblwr_free(void)
+/*================*/
+{
+	/* Free the double write data structures. */
+	ut_a(buf_dblwr != NULL);
+	ut_ad(buf_dblwr->s_reserved == 0);
+	ut_ad(buf_dblwr->b_reserved == 0);
+
+	ut_free(buf_dblwr->write_buf_unaligned);
+	buf_dblwr->write_buf_unaligned = NULL;
+
+	mem_free(buf_dblwr->buf_block_arr);
+	buf_dblwr->buf_block_arr = NULL;
+
+	mem_free(buf_dblwr->in_use);
+	buf_dblwr->in_use = NULL;
+
+	mutex_free(&buf_dblwr->mutex);
+	mem_free(buf_dblwr);
+	buf_dblwr = NULL;
+}
+
+/********************************************************************//**
+Updates the doublewrite buffer when an IO request that is part of an
+LRU or flush batch is completed. */
+UNIV_INTERN
+void
+buf_dblwr_update(void)
+/*==================*/
+{
+	if (!srv_use_doublewrite_buf || buf_dblwr == NULL) {
+		return;
+	}
+
+	mutex_enter(&buf_dblwr->mutex);
+
+	ut_ad(buf_dblwr->batch_running);
+	ut_ad(buf_dblwr->b_reserved > 0);
+
+	buf_dblwr->b_reserved--;
+	if (buf_dblwr->b_reserved == 0) {
+
+		mutex_exit(&buf_dblwr->mutex);
+		/* This will finish the batch. Sync data files
+		to the disk. */
+		fil_flush_file_spaces(FIL_TABLESPACE);
+		mutex_enter(&buf_dblwr->mutex);
+
+		/* We can now reuse the doublewrite memory buffer: */
+		buf_dblwr->first_free = 0;
+		buf_dblwr->batch_running = FALSE;
+	}
+
+	mutex_exit(&buf_dblwr->mutex);
+}
+
+/********************************************************************//**
+Flush a batch of writes to the datafiles that have already been
+written by the OS. */
+static
+void
+buf_dblwr_sync_datafiles(void)
+/*==========================*/
+{
+	/* Wake possible simulated aio thread to actually post the
+	writes to the operating system */
+	os_aio_simulated_wake_handler_threads();
+
+	/* Wait that all async writes to tablespaces have been posted to
+	the OS */
+	os_aio_wait_until_no_pending_writes();
+
+	/* Now we flush the data to disk (for example, with fsync) */
+	fil_flush_file_spaces(FIL_TABLESPACE);
+
+	return;
+}
+
+/********************************************************************//**
+Check the LSN values on the page. */
+static
+void
+buf_dblwr_check_page_lsn(
+/*=====================*/
+	const page_t*	page)		/*!< in: page to check */
+{
+	if (memcmp(page + (FIL_PAGE_LSN + 4),
+		   page + (UNIV_PAGE_SIZE
+			   - FIL_PAGE_END_LSN_OLD_CHKSUM + 4),
+		   4)) {
+
+		ut_print_timestamp(stderr);
+		fprintf(stderr,
+			" InnoDB: ERROR: The page to be written"
+			" seems corrupt!\n"
+			"InnoDB: The low 4 bytes of LSN fields do not match "
+			"(" ULINTPF " != " ULINTPF ")!"
+			" Noticed in the buffer pool.\n",
+			mach_read_from_4(
+				page + FIL_PAGE_LSN + 4),
+			mach_read_from_4(
+				page + UNIV_PAGE_SIZE
+				- FIL_PAGE_END_LSN_OLD_CHKSUM + 4));
+	}
+}
+
+/********************************************************************//**
+Asserts when a corrupt block is find during writing out data to the
+disk. */
+static
+void
+buf_dblwr_assert_on_corrupt_block(
+/*==============================*/
+	const buf_block_t*	block)	/*!< in: block to check */
+{
+	buf_page_print(block->frame, 0);
+
+	ut_print_timestamp(stderr);
+	fprintf(stderr,
+		"  InnoDB: Apparent corruption of an"
+		" index page n:o %lu in space %lu\n"
+		"InnoDB: to be written to data file."
+		" We intentionally crash server\n"
+		"InnoDB: to prevent corrupt data"
+		" from ending up in data\n"
+		"InnoDB: files.\n",
+		(ulong) buf_block_get_page_no(block),
+		(ulong) buf_block_get_space(block));
+
+	ut_error;
+}
+
+/********************************************************************//**
+Check the LSN values on the page with which this block is associated.
+Also validate the page if the option is set. */
+static
+void
+buf_dblwr_check_block(
+/*==================*/
+	const buf_block_t*	block)	/*!< in: block to check */
+{
+	if (buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE
+	    || block->page.zip.data) {
+		/* No simple validate for compressed pages exists. */
+		return;
+	}
+
+	buf_dblwr_check_page_lsn(block->frame);
+
+	if (!block->check_index_page_at_flush) {
+		return;
+	}
+
+	if (page_is_comp(block->frame)) {
+		if (!page_simple_validate_new(block->frame)) {
+			buf_dblwr_assert_on_corrupt_block(block);
+		}
+	} else if (!page_simple_validate_old(block->frame)) {
+
+		buf_dblwr_assert_on_corrupt_block(block);
+	}
+}
+
+/********************************************************************//**
+Writes a page that has already been written to the doublewrite buffer
+to the datafile. It is the job of the caller to sync the datafile. */
+static
+void
+buf_dblwr_write_block_to_datafile(
+/*==============================*/
+	const buf_block_t*	block)	/*!< in: block to write */
+{
+	ut_a(block);
+	ut_a(buf_page_in_file(&block->page));
+
+	if (block->page.zip.data) {
+		fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
+		       FALSE, buf_page_get_space(&block->page),
+		       buf_page_get_zip_size(&block->page),
+		       buf_page_get_page_no(&block->page), 0,
+		       buf_page_get_zip_size(&block->page),
+		       (void*) block->page.zip.data,
+		       (void*) block);
+
+		goto exit;
+	}
+
+	ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
+	buf_dblwr_check_page_lsn(block->frame);
+
+	fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
+	       FALSE, buf_block_get_space(block), 0,
+	       buf_block_get_page_no(block), 0, UNIV_PAGE_SIZE,
+	       (void*) block->frame, (void*) block);
+
+exit:
+	/* Increment the counter of I/O operations used
+	for selecting LRU policy. */
+	buf_LRU_stat_inc_io();
+}
+
+/********************************************************************//**
+Flushes possible buffered writes from the doublewrite memory buffer to disk,
+and also wakes up the aio thread if simulated aio is used. It is very
+important to call this function after a batch of writes has been posted,
+and also when we may have to wait for a page latch! Otherwise a deadlock
+of threads can occur. */
+UNIV_INTERN
+void
+buf_dblwr_flush_buffered_writes(void)
+/*=================================*/
+{
+	byte*		write_buf;
+	ulint		len;
+	ulint		len2;
+	ulint		i;
+
+	if (!srv_use_doublewrite_buf || buf_dblwr == NULL) {
+		/* Sync the writes to the disk. */
+		buf_dblwr_sync_datafiles();
+		return;
+	}
+
+try_again:
+	mutex_enter(&(buf_dblwr->mutex));
+
+	/* Write first to doublewrite buffer blocks. We use synchronous
+	aio and thus know that file write has been completed when the
+	control returns. */
+
+	if (buf_dblwr->first_free == 0) {
+
+		mutex_exit(&(buf_dblwr->mutex));
+
+		return;
+	}
+
+	if (buf_dblwr->batch_running) {
+		mutex_exit(&buf_dblwr->mutex);
+
+		/* Another thread is running the batch right now. Wait
+		for it to finish. */
+		os_thread_sleep(TRX_DOUBLEWRITE_BATCH_POLL_DELAY);
+		goto try_again;
+	}
+
+	ut_a(!buf_dblwr->batch_running);
+
+	/* Disallow anyone else to post to doublewrite buffer or to
+	start another batch of flushing. */
+	buf_dblwr->batch_running = TRUE;
+
+	/* Now safe to release the mutex. Note that though no other
+	thread is allowed to post to the doublewrite batch flushing
+	but any threads working on single page flushes are allowed
+	to proceed. */
+	mutex_exit(&buf_dblwr->mutex);
+
+	write_buf = buf_dblwr->write_buf;
+
+	for (len2 = 0, i = 0;
+	     i < buf_dblwr->first_free;
+	     len2 += UNIV_PAGE_SIZE, i++) {
+
+		const buf_block_t*	block;
+
+		block = (buf_block_t*) buf_dblwr->buf_block_arr[i];
+
+		if (buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE
+		    || block->page.zip.data) {
+			/* No simple validate for compressed
+			pages exists. */
+			continue;
+		}
+
+		/* Check that the actual page in the buffer pool is
+		not corrupt and the LSN values are sane. */
+		buf_dblwr_check_block(block);
+
+		/* Check that the page as written to the doublewrite
+		buffer has sane LSN values. */
+		buf_dblwr_check_page_lsn(write_buf + len2);
+	}
+
+	/* Write out the first block of the doublewrite buffer */
+	len = ut_min(TRX_SYS_DOUBLEWRITE_BLOCK_SIZE,
+		     buf_dblwr->first_free) * UNIV_PAGE_SIZE;
+
+	fil_io(OS_FILE_WRITE, TRUE, TRX_SYS_SPACE, 0,
+	       buf_dblwr->block1, 0, len,
+	       (void*) write_buf, NULL);
+
+	if (buf_dblwr->first_free <= TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
+		/* No unwritten pages in the second block. */
+		goto flush;
+	}
+
+	/* Write out the second block of the doublewrite buffer. */
+	len = (buf_dblwr->first_free - TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)
+	       * UNIV_PAGE_SIZE;
+
+	write_buf = buf_dblwr->write_buf
+		    + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE;
+
+	fil_io(OS_FILE_WRITE, TRUE, TRX_SYS_SPACE, 0,
+	       buf_dblwr->block2, 0, len,
+	       (void*) write_buf, NULL);
+
+flush:
+	/* increment the doublewrite flushed pages counter */
+	srv_dblwr_pages_written += buf_dblwr->first_free;
+	srv_dblwr_writes++;
+
+	/* Now flush the doublewrite buffer data to disk */
+	fil_flush(TRX_SYS_SPACE);
+
+	/* We know that the writes have been flushed to disk now
+	and in recovery we will find them in the doublewrite buffer
+	blocks. Next do the writes to the intended positions. */
+
+	for (i = 0; i < buf_dblwr->first_free; i++) {
+		const buf_block_t* block = (buf_block_t*)
+			buf_dblwr->buf_block_arr[i];
+
+		buf_dblwr_write_block_to_datafile(block);
+	}
+
+	/* Wake possible simulated aio thread to actually post the
+	writes to the operating system. We don't flush the files
+	at this point. We leave it to the IO helper thread to flush
+	datafiles when the whole batch has been processed. */
+	os_aio_simulated_wake_handler_threads();
+}
+
+/********************************************************************//**
+Posts a buffer page for writing. If the doublewrite memory buffer is
+full, calls buf_dblwr_flush_buffered_writes and waits for for free
+space to appear. */
+UNIV_INTERN
+void
+buf_dblwr_add_to_batch(
+/*====================*/
+	buf_page_t*	bpage)	/*!< in: buffer block to write */
+{
+	ulint	zip_size;
+
+	ut_a(buf_page_in_file(bpage));
+
+try_again:
+	mutex_enter(&(buf_dblwr->mutex));
+
+	ut_a(buf_dblwr->first_free <= srv_doublewrite_batch_size);
+
+	if (buf_dblwr->batch_running) {
+		mutex_exit(&buf_dblwr->mutex);
+
+		/* This not nearly as bad as it looks. There is only
+		page_cleaner thread which does background flushing
+		in batches therefore it is unlikely to be a contention
+		point. The only exception is when a user thread is
+		forced to do a flush batch because of a sync
+		checkpoint. */
+		os_thread_sleep(TRX_DOUBLEWRITE_BATCH_POLL_DELAY);
+		goto try_again;
+	}
+
+	if (buf_dblwr->first_free == srv_doublewrite_batch_size) {
+		mutex_exit(&(buf_dblwr->mutex));
+
+		buf_dblwr_flush_buffered_writes();
+
+		goto try_again;
+	}
+
+	zip_size = buf_page_get_zip_size(bpage);
+
+	if (zip_size) {
+		UNIV_MEM_ASSERT_RW(bpage->zip.data, zip_size);
+		/* Copy the compressed page and clear the rest. */
+		memcpy(buf_dblwr->write_buf
+		       + UNIV_PAGE_SIZE * buf_dblwr->first_free,
+		       bpage->zip.data, zip_size);
+		memset(buf_dblwr->write_buf
+		       + UNIV_PAGE_SIZE * buf_dblwr->first_free
+		       + zip_size, 0, UNIV_PAGE_SIZE - zip_size);
+	} else {
+		ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
+		UNIV_MEM_ASSERT_RW(((buf_block_t*) bpage)->frame,
+				   UNIV_PAGE_SIZE);
+
+		memcpy(buf_dblwr->write_buf
+		       + UNIV_PAGE_SIZE * buf_dblwr->first_free,
+		       ((buf_block_t*) bpage)->frame, UNIV_PAGE_SIZE);
+	}
+
+	buf_dblwr->buf_block_arr[buf_dblwr->first_free] = bpage;
+
+	buf_dblwr->first_free++;
+	buf_dblwr->b_reserved++;
+
+	ut_ad(buf_dblwr->b_reserved <= srv_doublewrite_batch_size);
+
+	if (buf_dblwr->first_free == srv_doublewrite_batch_size) {
+		mutex_exit(&(buf_dblwr->mutex));
+
+		buf_dblwr_flush_buffered_writes();
+
+		return;
+	}
+
+	mutex_exit(&(buf_dblwr->mutex));
+}
+
+/********************************************************************//**
+Writes a page to the doublewrite buffer on disk, sync it, then write
+the page to the datafile and sync the datafile. This function is used
+for single page flushes. If all the buffers allocated for single page
+flushes in the doublewrite buffer are in use we wait here for one to
+become free. We are guaranteed that a slot will become free because any
+thread that is using a slot must also release the slot before leaving
+this function. */
+UNIV_INTERN
+void
+buf_dblwr_write_single_page(
+/*========================*/
+	buf_page_t*	bpage)	/*!< in: buffer block to write */
+{
+	ulint		n_slots;
+	ulint		size;
+	ulint		zip_size;
+	ulint		offset;
+	ulint		i;
+
+	ut_a(buf_page_in_file(bpage));
+	ut_a(srv_use_doublewrite_buf);
+	ut_a(buf_dblwr != NULL);
+
+	/* total number of slots available for single page flushes
+	starts from srv_doublewrite_batch_size to the end of the
+	buffer. */
+	size = 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE;
+	ut_a(size > srv_doublewrite_batch_size);
+	n_slots = size - srv_doublewrite_batch_size;
+
+	if (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE) {
+
+		/* Check that the actual page in the buffer pool is
+		not corrupt and the LSN values are sane. */
+		buf_dblwr_check_block((buf_block_t*) bpage);
+
+		/* Check that the page as written to the doublewrite
+		buffer has sane LSN values. */
+		if (!bpage->zip.data) {
+			buf_dblwr_check_page_lsn(
+				((buf_block_t*) bpage)->frame);
+		}
+	}
+
+retry:
+	mutex_enter(&buf_dblwr->mutex);
+	if (buf_dblwr->s_reserved == n_slots) {
+
+		mutex_exit(&buf_dblwr->mutex);
+		/* All slots are reserved. Since it involves two IOs
+		during the processing a sleep of 10ms should be
+		enough. */
+		os_thread_sleep(TRX_DOUBLEWRITE_BATCH_POLL_DELAY);
+		goto retry;
+	}
+
+	for (i = srv_doublewrite_batch_size; i < size; ++i) {
+
+		if (!buf_dblwr->in_use[i]) {
+			break;
+		}
+	}
+
+	/* We are guaranteed to find a slot. */
+	ut_a(i < size);
+	buf_dblwr->in_use[i] = TRUE;
+	buf_dblwr->s_reserved++;
+	buf_dblwr->buf_block_arr[i] = bpage;
+	mutex_exit(&buf_dblwr->mutex);
+
+	/* Lets see if we are going to write in the first or second
+	block of the doublewrite buffer. */
+	if (i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
+		offset = buf_dblwr->block1 + i;
+	} else {
+		offset = buf_dblwr->block2 + i
+			 - TRX_SYS_DOUBLEWRITE_BLOCK_SIZE;
+	}
+
+	/* We deal with compressed and uncompressed pages a little
+	differently here. In case of uncompressed pages we can
+	directly write the block to the allocated slot in the
+	doublewrite buffer in the system tablespace and then after
+	syncing the system table space we can proceed to write the page
+	in the datafile.
+	In case of compressed page we first do a memcpy of the block
+	to the in-memory buffer of doublewrite before proceeding to
+	write it. This is so because we want to pad the remaining
+	bytes in the doublewrite page with zeros. */
+
+	zip_size = buf_page_get_zip_size(bpage);
+	if (zip_size) {
+		memcpy(buf_dblwr->write_buf + UNIV_PAGE_SIZE * i,
+		       bpage->zip.data, zip_size);
+		memset(buf_dblwr->write_buf + UNIV_PAGE_SIZE * i
+		       + zip_size, 0, UNIV_PAGE_SIZE - zip_size);
+
+		fil_io(OS_FILE_WRITE, TRUE, TRX_SYS_SPACE, 0,
+		       offset, 0, UNIV_PAGE_SIZE,
+		       (void*) (buf_dblwr->write_buf
+				+ UNIV_PAGE_SIZE * i), NULL);
+	} else {
+		/* It is a regular page. Write it directly to the
+		doublewrite buffer */
+		fil_io(OS_FILE_WRITE, TRUE, TRX_SYS_SPACE, 0,
+		       offset, 0, UNIV_PAGE_SIZE,
+		       (void*) ((buf_block_t*) bpage)->frame,
+		       NULL);
+	}
+
+	/* Now flush the doublewrite buffer data to disk */
+	fil_flush(TRX_SYS_SPACE);
+
+	/* We know that the write has been flushed to disk now
+	and during recovery we will find it in the doublewrite buffer
+	blocks. Next do the write to the intended position. */
+	buf_dblwr_write_block_to_datafile((buf_block_t*) bpage);
+
+	/* Sync the writes to the disk. */
+	buf_dblwr_sync_datafiles();
+
+	mutex_enter(&buf_dblwr->mutex);
+
+	buf_dblwr->s_reserved--;
+	buf_dblwr->buf_block_arr[i] = NULL;
+	buf_dblwr->in_use[i] = FALSE;
+
+	/* increment the doublewrite flushed pages counter */
+	srv_dblwr_pages_written += buf_dblwr->first_free;
+	srv_dblwr_writes++;
+
+	mutex_exit(&(buf_dblwr->mutex));
+
+}
+#endif /* !UNIV_HOTBACKUP */

=== modified file 'storage/innobase/buf/buf0flu.cc'
--- a/storage/innobase/buf/buf0flu.cc	2011-12-21 03:24:58 +0000
+++ b/storage/innobase/buf/buf0flu.cc	2011-12-23 13:17:36 +0000
@@ -62,11 +62,6 @@ Each interval is 1 second, defined by th
 srv_error_monitor_thread() calls buf_flush_stat_update(). */
 #define BUF_FLUSH_STAT_N_INTERVAL 20
 
-/** Time in milliseconds that we sleep when unable to find a slot in
-the doublewrite buffer or when we have to wait for a running batch
-to end. */
-#define TRX_DOUBLEWRITE_BATCH_POLL_DELAY	10000
-
 /** Sampled values buf_flush_stat_cur.
 Not protected by any mutex.  Updated by buf_flush_stat_update(). */
 static buf_flush_stat_t	buf_flush_stat_arr[BUF_FLUSH_STAT_N_INTERVAL];
@@ -670,40 +665,6 @@ buf_flush_relocate_on_flush_list(
 }
 
 /********************************************************************//**
-Updates the doublewrite buffer when an IO request that is part of an
-LRU or flush batch is completed. */
-UNIV_INLINE
-void
-buf_flush_update_doublewrite(void)
-/*==============================*/
-{
-	if (!srv_use_doublewrite_buf || trx_doublewrite == NULL) {
-		return;
-	}
-
-	mutex_enter(&trx_doublewrite->mutex);
-
-	ut_ad(trx_doublewrite->batch_running);
-	ut_ad(trx_doublewrite->b_reserved > 0);
-
-	trx_doublewrite->b_reserved--;
-	if (trx_doublewrite->b_reserved == 0) {
-
-		mutex_exit(&trx_doublewrite->mutex);
-		/* This will finish the batch. Sync data files
-		to the disk. */
-		fil_flush_file_spaces(FIL_TABLESPACE);
-		mutex_enter(&trx_doublewrite->mutex);
-
-		/* We can now reuse the doublewrite memory buffer: */
-		trx_doublewrite->first_free = 0;
-		trx_doublewrite->batch_running = FALSE;
-	}
-
-	mutex_exit(&trx_doublewrite->mutex);
-}
-
-/********************************************************************//**
 Updates the flush system data structures when a write is completed. */
 UNIV_INTERN
 void
@@ -735,7 +696,7 @@ buf_flush_write_complete(
 	switch (flush_type) {
 	case BUF_FLUSH_LIST:
 	case BUF_FLUSH_LRU:
-		buf_flush_update_doublewrite();
+		buf_dblwr_update();
 		break;
 	case BUF_FLUSH_SINGLE_PAGE:
 		/* Single page flushes are synchronous. No need
@@ -745,499 +706,6 @@ buf_flush_write_complete(
 		ut_error;
 	}
 }
-
-/********************************************************************//**
-Flush a batch of writes to the datafiles that have already been
-written by the OS. */
-static
-void
-buf_flush_sync_datafiles(void)
-/*==========================*/
-{
-	/* Wake possible simulated aio thread to actually post the
-	writes to the operating system */
-	os_aio_simulated_wake_handler_threads();
-
-	/* Wait that all async writes to tablespaces have been posted to
-	the OS */
-	os_aio_wait_until_no_pending_writes();
-
-	/* Now we flush the data to disk (for example, with fsync) */
-	fil_flush_file_spaces(FIL_TABLESPACE);
-
-	return;
-}
-
-/********************************************************************//**
-Check the LSN values on the page. */
-static
-void
-buf_flush_doublewrite_check_page_lsn(
-/*=================================*/
-	const page_t*	page)		/*!< in: page to check */
-{
-	if (memcmp(page + (FIL_PAGE_LSN + 4),
-		   page + (UNIV_PAGE_SIZE
-			   - FIL_PAGE_END_LSN_OLD_CHKSUM + 4),
-		   4)) {
-
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: ERROR: The page to be written"
-			" seems corrupt!\n"
-			"InnoDB: The low 4 bytes of LSN fields do not match "
-			"(" ULINTPF " != " ULINTPF ")!"
-			" Noticed in the buffer pool.\n",
-			mach_read_from_4(
-				page + FIL_PAGE_LSN + 4),
-			mach_read_from_4(
-				page + UNIV_PAGE_SIZE
-				- FIL_PAGE_END_LSN_OLD_CHKSUM + 4));
-	}
-}
-
-/********************************************************************//**
-Asserts when a corrupt block is find during writing out data to the
-disk. */
-static
-void
-buf_flush_doublewrite_assert_on_corrupt_block(
-/*==========================================*/
-	const buf_block_t*	block)	/*!< in: block to check */
-{
-	buf_page_print(block->frame, 0);
-
-	ut_print_timestamp(stderr);
-	fprintf(stderr,
-		"  InnoDB: Apparent corruption of an"
-		" index page n:o %lu in space %lu\n"
-		"InnoDB: to be written to data file."
-		" We intentionally crash server\n"
-		"InnoDB: to prevent corrupt data"
-		" from ending up in data\n"
-		"InnoDB: files.\n",
-		(ulong) buf_block_get_page_no(block),
-		(ulong) buf_block_get_space(block));
-
-	ut_error;
-}
-
-/********************************************************************//**
-Check the LSN values on the page with which this block is associated.
-Also validate the page if the option is set. */
-static
-void
-buf_flush_doublewrite_check_block(
-/*==============================*/
-	const buf_block_t*	block)	/*!< in: block to check */
-{
-	if (buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE
-	    || block->page.zip.data) {
-		/* No simple validate for compressed pages exists. */
-		return;
-	}
-
-	buf_flush_doublewrite_check_page_lsn(block->frame);
-
-	if (!block->check_index_page_at_flush) {
-		return;
-	}
-
-	if (page_is_comp(block->frame)) {
-		if (!page_simple_validate_new(block->frame)) {
-			buf_flush_doublewrite_assert_on_corrupt_block(block);
-		}
-	} else if (!page_simple_validate_old(block->frame)) {
-
-		buf_flush_doublewrite_assert_on_corrupt_block(block);
-	}
-}
-
-/********************************************************************//**
-Writes a page that has already been written to the doublewrite buffer
-to the datafile. It is the job of the caller to sync the datafile. */
-static
-void
-buf_flush_write_block_to_datafile(
-/*==============================*/
-	const buf_block_t*	block)	/*!< in: block to write */
-{
-	ut_a(block);
-	ut_a(buf_page_in_file(&block->page));
-
-	if (block->page.zip.data) {
-		fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
-		       FALSE, buf_page_get_space(&block->page),
-		       buf_page_get_zip_size(&block->page),
-		       buf_page_get_page_no(&block->page), 0,
-		       buf_page_get_zip_size(&block->page),
-		       (void*) block->page.zip.data,
-		       (void*) block);
-
-		goto exit;
-	}
-
-	ut_a(buf_block_get_state(block) == BUF_BLOCK_FILE_PAGE);
-	buf_flush_doublewrite_check_page_lsn(block->frame);
-
-	fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
-	       FALSE, buf_block_get_space(block), 0,
-	       buf_block_get_page_no(block), 0, UNIV_PAGE_SIZE,
-	       (void*) block->frame, (void*) block);
-
-exit:
-	/* Increment the counter of I/O operations used
-	for selecting LRU policy. */
-	buf_LRU_stat_inc_io();
-}
-
-/********************************************************************//**
-Flushes possible buffered writes from the doublewrite memory buffer to disk,
-and also wakes up the aio thread if simulated aio is used. It is very
-important to call this function after a batch of writes has been posted,
-and also when we may have to wait for a page latch! Otherwise a deadlock
-of threads can occur. */
-static
-void
-buf_flush_buffered_writes(void)
-/*===========================*/
-{
-	byte*		write_buf;
-	ulint		len;
-	ulint		len2;
-	ulint		i;
-
-	if (!srv_use_doublewrite_buf || trx_doublewrite == NULL) {
-		/* Sync the writes to the disk. */
-		buf_flush_sync_datafiles();
-		return;
-	}
-
-try_again:
-	mutex_enter(&(trx_doublewrite->mutex));
-
-	/* Write first to doublewrite buffer blocks. We use synchronous
-	aio and thus know that file write has been completed when the
-	control returns. */
-
-	if (trx_doublewrite->first_free == 0) {
-
-		mutex_exit(&(trx_doublewrite->mutex));
-
-		return;
-	}
-
-	if (trx_doublewrite->batch_running) {
-		mutex_exit(&trx_doublewrite->mutex);
-
-		/* Another thread is running the batch right now. Wait
-		for it to finish. */
-		os_thread_sleep(TRX_DOUBLEWRITE_BATCH_POLL_DELAY);
-		goto try_again;
-	}
-
-	ut_a(!trx_doublewrite->batch_running);
-
-	/* Disallow anyone else to post to doublewrite buffer or to
-	start another batch of flushing. */
-	trx_doublewrite->batch_running = TRUE;
-
-	/* Now safe to release the mutex. Note that though no other
-	thread is allowed to post to the doublewrite batch flushing
-	but any threads working on single page flushes are allowed
-	to proceed. */
-	mutex_exit(&trx_doublewrite->mutex);
-
-	write_buf = trx_doublewrite->write_buf;
-
-	for (len2 = 0, i = 0;
-	     i < trx_doublewrite->first_free;
-	     len2 += UNIV_PAGE_SIZE, i++) {
-
-		const buf_block_t*	block;
-
-		block = (buf_block_t*) trx_doublewrite->buf_block_arr[i];
-
-		if (buf_block_get_state(block) != BUF_BLOCK_FILE_PAGE
-		    || block->page.zip.data) {
-			/* No simple validate for compressed
-			pages exists. */
-			continue;
-		}
-
-		/* Check that the actual page in the buffer pool is
-		not corrupt and the LSN values are sane. */
-		buf_flush_doublewrite_check_block(block);
-
-		/* Check that the page as written to the doublewrite
-		buffer has sane LSN values. */
-		buf_flush_doublewrite_check_page_lsn(write_buf + len2);
-	}
-
-	/* Write out the first block of the doublewrite buffer */
-	len = ut_min(TRX_SYS_DOUBLEWRITE_BLOCK_SIZE,
-		     trx_doublewrite->first_free) * UNIV_PAGE_SIZE;
-
-	fil_io(OS_FILE_WRITE, TRUE, TRX_SYS_SPACE, 0,
-	       trx_doublewrite->block1, 0, len,
-	       (void*) write_buf, NULL);
-
-	if (trx_doublewrite->first_free <= TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
-		/* No unwritten pages in the second block. */
-		goto flush;
-	}
-
-	/* Write out the second block of the doublewrite buffer. */
-	len = (trx_doublewrite->first_free - TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)
-	       * UNIV_PAGE_SIZE;
-
-	write_buf = trx_doublewrite->write_buf
-		    + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE;
-
-	fil_io(OS_FILE_WRITE, TRUE, TRX_SYS_SPACE, 0,
-	       trx_doublewrite->block2, 0, len,
-	       (void*) write_buf, NULL);
-
-flush:
-	/* increment the doublewrite flushed pages counter */
-	srv_dblwr_pages_written += trx_doublewrite->first_free;
-	srv_dblwr_writes++;
-
-	/* Now flush the doublewrite buffer data to disk */
-	fil_flush(TRX_SYS_SPACE);
-
-	/* We know that the writes have been flushed to disk now
-	and in recovery we will find them in the doublewrite buffer
-	blocks. Next do the writes to the intended positions. */
-
-	for (i = 0; i < trx_doublewrite->first_free; i++) {
-		const buf_block_t* block = (buf_block_t*)
-			trx_doublewrite->buf_block_arr[i];
-
-		buf_flush_write_block_to_datafile(block);
-	}
-
-	/* Wake possible simulated aio thread to actually post the
-	writes to the operating system. We don't flush the files
-	at this point. We leave it to the IO helper thread to flush
-	datafiles when the whole batch has been processed. */
-	os_aio_simulated_wake_handler_threads();
-}
-
-/********************************************************************//**
-Posts a buffer page for writing. If the doublewrite memory buffer is
-full, calls buf_flush_buffered_writes and waits for for free space to
-appear. */
-static
-void
-buf_flush_post_to_doublewrite_buf(
-/*==============================*/
-	buf_page_t*	bpage)	/*!< in: buffer block to write */
-{
-	ulint	zip_size;
-
-	ut_a(buf_page_in_file(bpage));
-
-try_again:
-	mutex_enter(&(trx_doublewrite->mutex));
-
-	ut_a(trx_doublewrite->first_free <= srv_doublewrite_batch_size);
-
-	if (trx_doublewrite->batch_running) {
-		mutex_exit(&trx_doublewrite->mutex);
-
-		/* This not nearly as bad as it looks. There is only
-		page_cleaner thread which does background flushing
-		in batches therefore it is unlikely to be a contention
-		point. The only exception is when a user thread is
-		forced to do a flush batch because of a sync
-		checkpoint. */
-		os_thread_sleep(TRX_DOUBLEWRITE_BATCH_POLL_DELAY);
-		goto try_again;
-	}
-
-	if (trx_doublewrite->first_free == srv_doublewrite_batch_size) {
-		mutex_exit(&(trx_doublewrite->mutex));
-
-		buf_flush_buffered_writes();
-
-		goto try_again;
-	}
-
-	zip_size = buf_page_get_zip_size(bpage);
-
-	if (zip_size) {
-		UNIV_MEM_ASSERT_RW(bpage->zip.data, zip_size);
-		/* Copy the compressed page and clear the rest. */
-		memcpy(trx_doublewrite->write_buf
-		       + UNIV_PAGE_SIZE * trx_doublewrite->first_free,
-		       bpage->zip.data, zip_size);
-		memset(trx_doublewrite->write_buf
-		       + UNIV_PAGE_SIZE * trx_doublewrite->first_free
-		       + zip_size, 0, UNIV_PAGE_SIZE - zip_size);
-	} else {
-		ut_a(buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE);
-		UNIV_MEM_ASSERT_RW(((buf_block_t*) bpage)->frame,
-				   UNIV_PAGE_SIZE);
-
-		memcpy(trx_doublewrite->write_buf
-		       + UNIV_PAGE_SIZE * trx_doublewrite->first_free,
-		       ((buf_block_t*) bpage)->frame, UNIV_PAGE_SIZE);
-	}
-
-	trx_doublewrite->buf_block_arr[trx_doublewrite->first_free] = bpage;
-
-	trx_doublewrite->first_free++;
-	trx_doublewrite->b_reserved++;
-
-	ut_ad(trx_doublewrite->b_reserved <= srv_doublewrite_batch_size);
-
-	if (trx_doublewrite->first_free == srv_doublewrite_batch_size) {
-		mutex_exit(&(trx_doublewrite->mutex));
-
-		buf_flush_buffered_writes();
-
-		return;
-	}
-
-	mutex_exit(&(trx_doublewrite->mutex));
-}
-
-/********************************************************************//**
-Writes a page to the doublewrite buffer on disk, sync it, then write
-the page to the datafile and sync the datafile. This function is used
-for single page flushes. If all the buffers allocated for single page
-flushes in the doublewrite buffer are in use we wait here for one to
-become free. We are guaranteed that a slot will become free because any
-thread that is using a slot must also release the slot before leaving
-this function. */
-static
-void
-buf_flush_write_to_dblwr_and_datafile(
-/*==================================*/
-	buf_page_t*	bpage)	/*!< in: buffer block to write */
-{
-	ulint		n_slots;
-	ulint		size;
-	ulint		zip_size;
-	ulint		offset;
-	ulint		i;
-
-	ut_a(buf_page_in_file(bpage));
-	ut_a(srv_use_doublewrite_buf);
-	ut_a(trx_doublewrite != NULL);
-
-	/* total number of slots available for single page flushes
-	starts from srv_doublewrite_batch_size to the end of the
-	buffer. */
-	size = 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE;
-	ut_a(size > srv_doublewrite_batch_size);
-	n_slots = size - srv_doublewrite_batch_size;
-
-	if (buf_page_get_state(bpage) == BUF_BLOCK_FILE_PAGE) {
-
-		/* Check that the actual page in the buffer pool is
-		not corrupt and the LSN values are sane. */
-		buf_flush_doublewrite_check_block((buf_block_t*) bpage);
-
-		/* Check that the page as written to the doublewrite
-		buffer has sane LSN values. */
-		if (!bpage->zip.data) {
-			buf_flush_doublewrite_check_page_lsn(
-				((buf_block_t*) bpage)->frame);
-		}
-	}
-
-retry:
-	mutex_enter(&trx_doublewrite->mutex);
-	if (trx_doublewrite->s_reserved == n_slots) {
-
-		mutex_exit(&trx_doublewrite->mutex);
-		/* All slots are reserved. Since it involves two IOs
-		during the processing a sleep of 10ms should be
-		enough. */
-		os_thread_sleep(TRX_DOUBLEWRITE_BATCH_POLL_DELAY);
-		goto retry;
-	}
-
-	for (i = srv_doublewrite_batch_size; i < size; ++i) {
-
-		if (!trx_doublewrite->in_use[i]) {
-			break;
-		}
-	}
-
-	/* We are guaranteed to find a slot. */
-	ut_a(i < size);
-	trx_doublewrite->in_use[i] = TRUE;
-	trx_doublewrite->s_reserved++;
-	trx_doublewrite->buf_block_arr[i] = bpage;
-	mutex_exit(&trx_doublewrite->mutex);
-
-	/* Lets see if we are going to write in the first or second
-	block of the doublewrite buffer. */
-	if (i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
-		offset = trx_doublewrite->block1 + i;
-	} else {
-		offset = trx_doublewrite->block2 + i
-			 - TRX_SYS_DOUBLEWRITE_BLOCK_SIZE;
-	}
-
-	/* We deal with compressed and uncompressed pages a little
-	differently here. In case of uncompressed pages we can
-	directly write the block to the allocated slot in the
-	doublewrite buffer in the system tablespace and then after
-	syncing the system table space we can proceed to write the page
-	in the datafile.
-	In case of compressed page we first do a memcpy of the block
-	to the in-memory buffer of doublewrite before proceeding to
-	write it. This is so because we want to pad the remaining
-	bytes in the doublewrite page with zeros. */
-
-	zip_size = buf_page_get_zip_size(bpage);
-	if (zip_size) {
-		memcpy(trx_doublewrite->write_buf + UNIV_PAGE_SIZE * i,
-		       bpage->zip.data, zip_size);
-		memset(trx_doublewrite->write_buf + UNIV_PAGE_SIZE * i
-		       + zip_size, 0, UNIV_PAGE_SIZE - zip_size);
-
-		fil_io(OS_FILE_WRITE, TRUE, TRX_SYS_SPACE, 0,
-		       offset, 0, UNIV_PAGE_SIZE,
-		       (void*) (trx_doublewrite->write_buf
-				+ UNIV_PAGE_SIZE * i), NULL);
-	} else {
-		/* It is a regular page. Write it directly to the
-		doublewrite buffer */
-		fil_io(OS_FILE_WRITE, TRUE, TRX_SYS_SPACE, 0,
-		       offset, 0, UNIV_PAGE_SIZE,
-		       (void*) ((buf_block_t*) bpage)->frame,
-		       NULL);
-	}
-
-	/* Now flush the doublewrite buffer data to disk */
-	fil_flush(TRX_SYS_SPACE);
-
-	/* We know that the write has been flushed to disk now
-	and during recovery we will find it in the doublewrite buffer
-	blocks. Next do the write to the intended position. */
-	buf_flush_write_block_to_datafile((buf_block_t*) bpage);
-
-	/* Sync the writes to the disk. */
-	buf_flush_sync_datafiles();
-
-	mutex_enter(&trx_doublewrite->mutex);
-
-	trx_doublewrite->s_reserved--;
-	trx_doublewrite->buf_block_arr[i] = NULL;
-	trx_doublewrite->in_use[i] = FALSE;
-
-	/* increment the doublewrite flushed pages counter */
-	srv_dblwr_pages_written += trx_doublewrite->first_free;
-	srv_dblwr_writes++;
-
-	mutex_exit(&(trx_doublewrite->mutex));
-
-}
 #endif /* !UNIV_HOTBACKUP */
 
 /********************************************************************//**
@@ -1356,7 +824,8 @@ buf_flush_init_for_writing(
 /********************************************************************//**
 Does an asynchronous write of a buffer page. NOTE: in simulated aio and
 also when the doublewrite buffer is used, we must call
-buf_flush_buffered_writes after we have posted a batch of writes! */
+buf_dblwr_flush_buffered_writes after we have posted a batch of
+writes! */
 static
 void
 buf_flush_write_block_low(
@@ -1437,16 +906,16 @@ buf_flush_write_block_low(
 		break;
 	}
 
-	if (!srv_use_doublewrite_buf || !trx_doublewrite) {
+	if (!srv_use_doublewrite_buf || !buf_dblwr) {
 		fil_io(OS_FILE_WRITE | OS_AIO_SIMULATED_WAKE_LATER,
 		       FALSE, buf_page_get_space(bpage), zip_size,
 		       buf_page_get_page_no(bpage), 0,
 		       zip_size ? zip_size : UNIV_PAGE_SIZE,
 		       frame, bpage);
 	} else if (flush_type == BUF_FLUSH_SINGLE_PAGE) {
-		buf_flush_write_to_dblwr_and_datafile(bpage);
+		buf_dblwr_write_single_page(bpage);
 	} else {
-		buf_flush_post_to_doublewrite_buf(bpage);
+		buf_dblwr_add_to_batch(bpage);
 	}
 }
 
@@ -1514,7 +983,7 @@ buf_flush_page(
 		flush_list or LRU_list. */
 
 		if (!is_s_latched) {
-			buf_flush_buffered_writes();
+			buf_dblwr_flush_buffered_writes();
 
 			if (is_uncompressed) {
 				rw_lock_s_lock_gen(&((buf_block_t*) bpage)
@@ -2128,7 +1597,7 @@ buf_flush_batch(
 
 	buf_pool_mutex_exit(buf_pool);
 
-	buf_flush_buffered_writes();
+	buf_dblwr_flush_buffered_writes();
 
 #ifdef UNIV_DEBUG
 	if (buf_debug_prints && count > 0) {
@@ -2153,7 +1622,7 @@ buf_flush_common(
 	enum buf_flush	flush_type,	/*!< in: type of flush */
 	ulint		page_count)	/*!< in: number of pages flushed */
 {
-	buf_flush_buffered_writes();
+	buf_dblwr_flush_buffered_writes();
 
 	ut_a(flush_type == BUF_FLUSH_LRU || flush_type == BUF_FLUSH_LIST);
 

=== modified file 'storage/innobase/buf/buf0lru.cc'
--- a/storage/innobase/buf/buf0lru.cc	2011-12-19 08:43:28 +0000
+++ b/storage/innobase/buf/buf0lru.cc	2011-12-23 13:17:36 +0000
@@ -41,6 +41,7 @@ Created 11/5/1995 Heikki Tuuri
 #include "btr0btr.h"
 #include "buf0buddy.h"
 #include "buf0buf.h"
+#include "buf0dblwr.h"
 #include "buf0flu.h"
 #include "buf0rea.h"
 #include "btr0sea.h"
@@ -866,7 +867,7 @@ loop:
 
 	if (buf_pool->init_flush[BUF_FLUSH_LRU]
 	    && srv_use_doublewrite_buf
-	    && trx_doublewrite != NULL) {
+	    && buf_dblwr != NULL) {
 
 		/* If there is an LRU flush happening in the background
 		then we wait for it to end instead of trying a single

=== modified file 'storage/innobase/buf/buf0rea.cc'
--- a/storage/innobase/buf/buf0rea.cc	2011-11-30 10:27:10 +0000
+++ b/storage/innobase/buf/buf0rea.cc	2011-12-23 13:17:36 +0000
@@ -31,6 +31,7 @@ Created 11/5/1995 Heikki Tuuri
 #include "buf0buf.h"
 #include "buf0flu.h"
 #include "buf0lru.h"
+#include "buf0dblwr.h"
 #include "ibuf0ibuf.h"
 #include "log0recv.h"
 #include "trx0sys.h"
@@ -93,13 +94,7 @@ buf_read_page_low(
 	ignore_nonexistent_pages = mode & BUF_READ_IGNORE_NONEXISTENT_PAGES;
 	mode &= ~BUF_READ_IGNORE_NONEXISTENT_PAGES;
 
-	if (trx_doublewrite && space == TRX_SYS_SPACE
-	    && (   (offset >= trx_doublewrite->block1
-		    && offset < trx_doublewrite->block1
-		    + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)
-		   || (offset >= trx_doublewrite->block2
-		       && offset < trx_doublewrite->block2
-		       + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE))) {
+	if (space == TRX_SYS_SPACE && buf_dblwr_page_inside(offset)) {
 		ut_print_timestamp(stderr);
 		fprintf(stderr,
 			"  InnoDB: Warning: trying to read"

=== modified file 'storage/innobase/handler/ha_innodb.cc'
--- a/storage/innobase/handler/ha_innodb.cc	2011-12-21 08:49:47 +0000
+++ b/storage/innobase/handler/ha_innodb.cc	2012-01-04 17:07:47 +0000
@@ -49,6 +49,7 @@ this program; if not, write to the Free
 #include "buf0dump.h"
 #include "buf0lru.h"
 #include "buf0flu.h"
+#include "buf0dblwr.h"
 #include "btr0sea.h"
 #include "os0file.h"
 #include "os0thread.h"
@@ -324,7 +325,7 @@ static PSI_mutex_info all_innodb_mutexes
 #  ifdef UNIV_SYNC_DEBUG
 	{&sync_thread_mutex_key, "sync_thread_mutex", 0},
 #  endif /* UNIV_SYNC_DEBUG */
-	{&trx_doublewrite_mutex_key, "trx_doublewrite_mutex", 0},
+	{&buf_dblwr_mutex_key, "buf_dblwr_mutex", 0},
 	{&trx_undo_mutex_key, "trx_undo_mutex", 0},
 	{&srv_sys_mutex_key, "srv_sys_mutex", 0},
 	{&lock_sys_mutex_key, "lock_mutex", 0},
@@ -568,6 +569,8 @@ static SHOW_VAR innodb_status_variables[
   (char*) &export_vars.innodb_num_open_files,		  SHOW_LONG},
   {"truncated_status_writes",
   (char*) &export_vars.innodb_truncated_status_writes,	  SHOW_LONG},
+  {"available_undo_logs",
+  (char*) &export_vars.innodb_available_undo_logs,        SHOW_LONG},
   {NullS, NullS, SHOW_LONG}
 };
 
@@ -13313,6 +13316,39 @@ innodb_change_buffer_max_size_update(
 	ibuf_max_size_update(innobase_change_buffer_max_size);
 }
 
+/********************************************************************
+Check if innodb_undo_logs is valid. This function is registered as
+a callback with MySQL.
+@return	0 for valid innodb_undo_logs
+@see mysql_var_check_func */
+static
+int
+innodb_undo_logs_validate(
+/*======================*/
+	THD*				thd,	/*!< in: thread handle */
+	struct st_mysql_sys_var*	var,	/*!< in: ptr to sys var */
+	void*				save,	/*!< out: immediate result
+						for update function */
+	struct st_mysql_value*		value)	/*!< in: incoming string */
+{
+        long long rsegs;
+
+	DBUG_ENTER("innodb_undo_logs_validate");
+
+	DBUG_ASSERT(save != NULL);
+	DBUG_ASSERT(value != NULL);
+	DBUG_ASSERT(srv_available_undo_logs <= TRX_SYS_N_RSEGS);
+
+	value->val_int(value, &rsegs);
+
+        if (rsegs > (long long) srv_available_undo_logs) {
+		rsegs = srv_available_undo_logs;
+	}
+	*reinterpret_cast<ulint*>(save) = static_cast<ulint>(rsegs);
+
+	DBUG_RETURN(0);
+}
+
 /*************************************************************//**
 Find the corresponding ibuf_use_t value that indexes into
 innobase_change_buffering_values[] array for the input
@@ -14662,7 +14698,7 @@ static MYSQL_SYSVAR_ULONG(undo_tablespac
 static MYSQL_SYSVAR_ULONG(undo_logs, srv_undo_logs,
   PLUGIN_VAR_OPCMDARG,
   "Number of undo logs to use.",
-  NULL, NULL,
+  innodb_undo_logs_validate, NULL,
   TRX_SYS_N_RSEGS,	/* Default setting */
   1,			/* Minimum value */
   TRX_SYS_N_RSEGS, 0);	/* Maximum value */
@@ -15179,3 +15215,5 @@ ha_innobase::idx_cond_push(
 	/* We will evaluate the condition entirely */
 	DBUG_RETURN(NULL);
 }
+
+

=== modified file 'storage/innobase/ibuf/ibuf0ibuf.cc'
--- a/storage/innobase/ibuf/ibuf0ibuf.cc	2011-11-30 10:27:10 +0000
+++ b/storage/innobase/ibuf/ibuf0ibuf.cc	2011-12-23 13:17:36 +0000
@@ -1255,17 +1255,9 @@ ibuf_rec_get_page_no_func(
 
 	field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_MARKER, &len);
 
-	if (len == 1) {
-		/* This is of the >= 4.1.x record format */
-		ut_a(trx_sys_multiple_tablespace_format);
-
-		field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_PAGE, &len);
-	} else {
-		ut_a(trx_doublewrite_must_reset_space_ids);
-		ut_a(!trx_sys_multiple_tablespace_format);
+	ut_a(len == 1);
 
-		field = rec_get_nth_field_old(rec, 0, &len);
-	}
+	field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_PAGE, &len);
 
 	ut_a(len == 4);
 
@@ -1301,20 +1293,13 @@ ibuf_rec_get_space_func(
 
 	field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_MARKER, &len);
 
-	if (len == 1) {
-		/* This is of the >= 4.1.x record format */
-
-		ut_a(trx_sys_multiple_tablespace_format);
-		field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_SPACE, &len);
-		ut_a(len == 4);
+	ut_a(len == 1);
 
-		return(mach_read_from_4(field));
-	}
+	field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_SPACE, &len);
 
-	ut_a(trx_doublewrite_must_reset_space_ids);
-	ut_a(!trx_sys_multiple_tablespace_format);
+	ut_a(len == 4);
 
-	return(0);
+	return(mach_read_from_4(field));
 }
 
 #ifdef UNIV_DEBUG
@@ -1584,58 +1569,6 @@ ibuf_dummy_index_free(
 	dict_mem_table_free(table);
 }
 
-/*********************************************************************//**
-Builds the entry to insert into a non-clustered index when we have the
-corresponding record in an ibuf index.
-
-NOTE that as we copy pointers to fields in ibuf_rec, the caller must
-hold a latch to the ibuf_rec page as long as the entry is used!
-
-@return own: entry to insert to a non-clustered index */
-UNIV_INLINE
-dtuple_t*
-ibuf_build_entry_pre_4_1_x(
-/*=======================*/
-	const rec_t*	ibuf_rec,	/*!< in: record in an insert buffer */
-	mem_heap_t*	heap,		/*!< in: heap where built */
-	dict_index_t**	pindex)		/*!< out, own: dummy index that
-					describes the entry */
-{
-	ulint		i;
-	ulint		len;
-	const byte*	types;
-	dtuple_t*	tuple;
-	ulint		n_fields;
-
-	ut_a(trx_doublewrite_must_reset_space_ids);
-	ut_a(!trx_sys_multiple_tablespace_format);
-
-	n_fields = rec_get_n_fields_old(ibuf_rec) - 2;
-	tuple = dtuple_create(heap, n_fields);
-	types = rec_get_nth_field_old(ibuf_rec, 1, &len);
-
-	ut_a(len == n_fields * DATA_ORDER_NULL_TYPE_BUF_SIZE);
-
-	for (i = 0; i < n_fields; i++) {
-		const byte*	data;
-		dfield_t*	field;
-
-		field = dtuple_get_nth_field(tuple, i);
-
-		data = rec_get_nth_field_old(ibuf_rec, i + 2, &len);
-
-		dfield_set_data(field, data, len);
-
-		dtype_read_for_order_and_null_size(
-			dfield_get_type(field),
-			types + i * DATA_ORDER_NULL_TYPE_BUF_SIZE);
-	}
-
-	*pindex = ibuf_dummy_index_create(n_fields, FALSE);
-
-	return(tuple);
-}
-
 #ifdef UNIV_DEBUG
 # define ibuf_build_entry_from_ibuf_rec(mtr,ibuf_rec,heap,pindex)	\
 	ibuf_build_entry_from_ibuf_rec_func(mtr,ibuf_rec,heap,pindex)
@@ -1689,15 +1622,7 @@ ibuf_build_entry_from_ibuf_rec_func(
 
 	data = rec_get_nth_field_old(ibuf_rec, IBUF_REC_FIELD_MARKER, &len);
 
-	if (len > 1) {
-		/* This a < 4.1.x format record */
-
-		return(ibuf_build_entry_pre_4_1_x(ibuf_rec, heap, pindex));
-	}
-
-	/* This a >= 4.1.x format record */
-
-	ut_a(trx_sys_multiple_tablespace_format);
+	ut_a(len == 1);
 	ut_a(*data == 0);
 	ut_a(rec_get_n_fields_old(ibuf_rec) > IBUF_REC_FIELD_USER);
 
@@ -1753,8 +1678,6 @@ ibuf_rec_get_size(
 	const rec_t*	rec,			/*!< in: ibuf record */
 	const byte*	types,			/*!< in: fields */
 	ulint		n_fields,		/*!< in: number of fields */
-	ibool		pre_4_1,		/*!< in: TRUE=pre-4.1 format,
-						FALSE=newer */
 	ulint		comp)			/*!< in: 0=ROW_FORMAT=REDUNDANT,
 						nonzero=ROW_FORMAT=COMPACT */
 {
@@ -1763,13 +1686,8 @@ ibuf_rec_get_size(
 	ulint	types_offset;
 	ulint	size = 0;
 
-	if (pre_4_1) {
-		field_offset = 2;
-		types_offset = DATA_ORDER_NULL_TYPE_BUF_SIZE;
-	} else {
-		field_offset = IBUF_REC_FIELD_USER;
-		types_offset = DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE;
-	}
+	field_offset = IBUF_REC_FIELD_USER;
+	types_offset = DATA_NEW_ORDER_NULL_TYPE_BUF_SIZE;
 
 	for (i = 0; i < n_fields; i++) {
 		ulint		len;
@@ -1779,10 +1697,6 @@ ibuf_rec_get_size(
 
 		if (len != UNIV_SQL_NULL) {
 			size += len;
-		} else if (pre_4_1) {
-			dtype_read_for_order_and_null_size(&dtype, types);
-
-			size += dtype_get_sql_null_size(&dtype, comp);
 		} else {
 			dtype_new_read_for_order_and_null_size(&dtype, types);
 
@@ -1820,8 +1734,9 @@ ibuf_rec_get_volume_func(
 	const byte*	types;
 	ulint		n_fields;
 	ulint		data_size;
-	ibool		pre_4_1;
 	ulint		comp;
+	ibuf_op_t	op;
+	ulint		info_len;
 
 	ut_ad(mtr_memo_contains_page(mtr, ibuf_rec, MTR_MEMO_PAGE_X_FIX)
 	      || mtr_memo_contains_page(mtr, ibuf_rec, MTR_MEMO_PAGE_S_FIX));
@@ -1829,64 +1744,44 @@ ibuf_rec_get_volume_func(
 	ut_ad(rec_get_n_fields_old(ibuf_rec) > 2);
 
 	data = rec_get_nth_field_old(ibuf_rec, IBUF_REC_FIELD_MARKER, &len);
-	pre_4_1 = (len > 1);
-
-	if (pre_4_1) {
-		/* < 4.1.x format record */
-
-		ut_a(trx_doublewrite_must_reset_space_ids);
-		ut_a(!trx_sys_multiple_tablespace_format);
-
-		n_fields = rec_get_n_fields_old(ibuf_rec) - 2;
-
-		types = rec_get_nth_field_old(ibuf_rec, 1, &len);
-
-		ut_ad(len == n_fields * DATA_ORDER_NULL_TYPE_BUF_SIZE);
-		comp = 0;
-	} else {
-		/* >= 4.1.x format record */
-		ibuf_op_t	op;
-		ulint		info_len;
-
-		ut_a(trx_sys_multiple_tablespace_format);
-		ut_a(*data == 0);
+	ut_a(len == 1);
+	ut_a(*data == 0);
 
-		types = rec_get_nth_field_old(
-			ibuf_rec, IBUF_REC_FIELD_METADATA, &len);
+	types = rec_get_nth_field_old(
+		ibuf_rec, IBUF_REC_FIELD_METADATA, &len);
 
-		ibuf_rec_get_info(mtr, ibuf_rec, &op, &comp, &info_len, NULL);
-
-		if (op == IBUF_OP_DELETE_MARK || op == IBUF_OP_DELETE) {
-			/* Delete-marking a record doesn't take any
-			additional space, and while deleting a record
-			actually frees up space, we have to play it safe and
-			pretend it takes no additional space (the record
-			might not exist, etc.).  */
+	ibuf_rec_get_info(mtr, ibuf_rec, &op, &comp, &info_len, NULL);
 
-			return(0);
-		} else if (comp) {
-			dtuple_t*	entry;
-			ulint		volume;
-			dict_index_t*	dummy_index;
-			mem_heap_t*	heap = mem_heap_create(500);
+	if (op == IBUF_OP_DELETE_MARK || op == IBUF_OP_DELETE) {
+		/* Delete-marking a record doesn't take any
+		additional space, and while deleting a record
+		actually frees up space, we have to play it safe and
+		pretend it takes no additional space (the record
+		might not exist, etc.).  */
 
-			entry = ibuf_build_entry_from_ibuf_rec(
-				mtr, ibuf_rec, heap, &dummy_index);
+		return(0);
+	} else if (comp) {
+		dtuple_t*	entry;
+		ulint		volume;
+		dict_index_t*	dummy_index;
+		mem_heap_t*	heap = mem_heap_create(500);
 
-			volume = rec_get_converted_size(dummy_index, entry, 0);
+		entry = ibuf_build_entry_from_ibuf_rec(mtr, ibuf_rec,
+			heap, &dummy_index);
 
-			ibuf_dummy_index_free(dummy_index);
-			mem_heap_free(heap);
+		volume = rec_get_converted_size(dummy_index, entry, 0);
 
-			return(volume + page_dir_calc_reserved_space(1));
-		}
+		ibuf_dummy_index_free(dummy_index);
+		mem_heap_free(heap);
 
-		types += info_len;
-		n_fields = rec_get_n_fields_old(ibuf_rec)
-			- IBUF_REC_FIELD_USER;
+		return(volume + page_dir_calc_reserved_space(1));
 	}
 
-	data_size = ibuf_rec_get_size(ibuf_rec, types, n_fields, pre_4_1, comp);
+	types += info_len;
+	n_fields = rec_get_n_fields_old(ibuf_rec)
+		- IBUF_REC_FIELD_USER;
+
+	data_size = ibuf_rec_get_size(ibuf_rec, types, n_fields, comp);
 
 	return(data_size + rec_get_converted_extra_size(data_size, n_fields, 0)
 	       + page_dir_calc_reserved_space(1));
@@ -2060,7 +1955,7 @@ ibuf_entry_build(
 
 /*********************************************************************//**
 Builds a search tuple used to search buffered inserts for an index page.
-This is for < 4.1.x format records
+This is for >= 4.1.x format records.
 @return	own: search tuple */
 static
 dtuple_t*
@@ -2074,45 +1969,6 @@ ibuf_search_tuple_build(
 	dfield_t*	field;
 	byte*		buf;
 
-	ut_a(space == 0);
-	ut_a(trx_doublewrite_must_reset_space_ids);
-	ut_a(!trx_sys_multiple_tablespace_format);
-
-	tuple = dtuple_create(heap, 1);
-
-	/* Store the page number in tuple */
-
-	field = dtuple_get_nth_field(tuple, 0);
-
-	buf = static_cast<byte*>(mem_heap_alloc(heap, 4));
-
-	mach_write_to_4(buf, page_no);
-
-	dfield_set_data(field, buf, 4);
-
-	dtuple_set_types_binary(tuple, 1);
-
-	return(tuple);
-}
-
-/*********************************************************************//**
-Builds a search tuple used to search buffered inserts for an index page.
-This is for >= 4.1.x format records.
-@return	own: search tuple */
-static
-dtuple_t*
-ibuf_new_search_tuple_build(
-/*========================*/
-	ulint		space,	/*!< in: space id */
-	ulint		page_no,/*!< in: index page number */
-	mem_heap_t*	heap)	/*!< in: heap into which to build */
-{
-	dtuple_t*	tuple;
-	dfield_t*	field;
-	byte*		buf;
-
-	ut_a(trx_sys_multiple_tablespace_format);
-
 	tuple = dtuple_create(heap, IBUF_REC_FIELD_METADATA);
 
 	/* Store the space id in tuple */
@@ -2834,8 +2690,7 @@ ibuf_get_volume_buffered_hash(
 
 	len = ibuf_rec_get_size(
 		rec, types,
-		rec_get_n_fields_old(rec) - IBUF_REC_FIELD_USER,
-		FALSE, comp);
+		rec_get_n_fields_old(rec) - IBUF_REC_FIELD_USER, comp);
 	fold = ut_fold_binary(data, len);
 
 	hash += (fold / (CHAR_BIT * sizeof *hash)) % size;
@@ -2895,7 +2750,6 @@ ibuf_get_volume_buffered_count_func(
 	operations.  All pre-4.1 records should have been merged
 	when the database was started up. */
 	ut_a(len == 1);
-	ut_ad(trx_sys_multiple_tablespace_format);
 
 	types = rec_get_nth_field_old(rec, IBUF_REC_FIELD_METADATA, &len);
 
@@ -2909,7 +2763,7 @@ ibuf_get_volume_buffered_count_func(
 		because deletes cannot be buffered if there are
 		old-style inserts buffered for the page. */
 
-		len = ibuf_rec_get_size(rec, types, n_fields, FALSE, 0);
+		len = ibuf_rec_get_size(rec, types, n_fields, 0);
 
 		return(len
 		       + rec_get_converted_extra_size(len, n_fields, 0)
@@ -3014,8 +2868,6 @@ ibuf_get_volume_buffered(
 	/* bitmap of buffered recs */
 	ulint		hash_bitmap[128 / sizeof(ulint)];
 
-	ut_a(trx_sys_multiple_tablespace_format);
-
 	ut_ad((pcur->latch_mode == BTR_MODIFY_PREV)
 	      || (pcur->latch_mode == BTR_MODIFY_TREE));
 
@@ -3259,18 +3111,11 @@ ibuf_get_entry_counter_low_func(
 
 	field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_MARKER, &len);
 
-	if (UNIV_UNLIKELY(len != 1)) {
-		/* pre-4.1 format */
-		ut_a(trx_doublewrite_must_reset_space_ids);
-		ut_a(!trx_sys_multiple_tablespace_format);
-
-		return(ULINT_UNDEFINED);
-	}
-
-	ut_a(trx_sys_multiple_tablespace_format);
+	ut_a(len == 1);
 
 	/* Check the tablespace identifier. */
 	field = rec_get_nth_field_old(rec, IBUF_REC_FIELD_SPACE, &len);
+
 	ut_a(len == 4);
 
 	if (mach_read_from_4(field) != space) {
@@ -3411,8 +3256,6 @@ ibuf_insert_low(
 	ut_ad(!no_counter || op == IBUF_OP_INSERT);
 	ut_a(op < IBUF_OP_COUNT);
 
-	ut_a(trx_sys_multiple_tablespace_format);
-
 	do_merge = FALSE;
 
 	/* Perform dirty reads of ibuf->size and ibuf->max_size, to
@@ -3712,7 +3555,6 @@ ibuf_insert(
 	this function, so that we will have a consistent view of it. */
 	ibuf_use_t	use		= ibuf_use;
 
-	ut_a(trx_sys_multiple_tablespace_format);
 	ut_ad(dtuple_check_typed(entry));
 	ut_ad(ut_is_2pow(zip_size));
 
@@ -4465,13 +4307,7 @@ ibuf_merge_or_delete_for_page(
 
 	heap = mem_heap_create(512);
 
-	if (UNIV_UNLIKELY(!trx_sys_multiple_tablespace_format)) {
-		ut_a(trx_doublewrite_must_reset_space_ids);
-		search_tuple = ibuf_search_tuple_build(space, page_no, heap);
-	} else {
-		search_tuple = ibuf_new_search_tuple_build(space, page_no,
-							   heap);
-	}
+	search_tuple = ibuf_search_tuple_build(space, page_no, heap);
 
 	if (block) {
 		/* Move the ownership of the x-latch on the page to this OS
@@ -4780,7 +4616,7 @@ ibuf_delete_for_discarded_space(
 	/* Use page number 0 to build the search tuple so that we get the
 	cursor positioned at the first entry for this space id */
 
-	search_tuple = ibuf_new_search_tuple_build(space, 0, heap);
+	search_tuple = ibuf_search_tuple_build(space, 0, heap);
 
 	memset(dops, 0, sizeof(dops));
 loop:

=== added file 'storage/innobase/include/buf0dblwr.h'
--- a/storage/innobase/include/buf0dblwr.h	1970-01-01 00:00:00 +0000
+++ b/storage/innobase/include/buf0dblwr.h	2011-12-23 13:17:36 +0000
@@ -0,0 +1,147 @@
+/*****************************************************************************
+
+Copyright (c) 1995, 2011, Oracle and/or its affiliates. All Rights Reserved.
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation; version 2 of the License.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License along with
+this program; if not, write to the Free Software Foundation, Inc.,
+51 Franklin Street, Suite 500, Boston, MA 02110-1335 USA
+
+*****************************************************************************/
+
+/**************************************************//**
+@file include/buf0dblwr.h
+Doublewrite buffer module
+
+Created 2011/12/19 Inaam Rana
+*******************************************************/
+
+#ifndef buf0dblwr_h
+#define buf0dblwr_h
+
+#include "univ.i"
+#include "ut0byte.h"
+#include "log0log.h"
+
+#ifndef UNIV_HOTBACKUP
+
+/** Doublewrite system */
+extern buf_dblwr_t*	buf_dblwr;
+/** Set to TRUE when the doublewrite buffer is being created */
+extern ibool		buf_dblwr_being_created;
+
+/****************************************************************//**
+Creates the doublewrite buffer to a new InnoDB installation. The header of the
+doublewrite buffer is placed on the trx system header page. */
+UNIV_INTERN
+void
+buf_dblwr_create(void);
+/*==================*/
+/****************************************************************//**
+At a database startup initializes the doublewrite buffer memory structure if
+we already have a doublewrite buffer created in the data files. If we are
+upgrading to an InnoDB version which supports multiple tablespaces, then this
+function performs the necessary update operations. If we are in a crash
+recovery, this function uses a possible doublewrite buffer to restore
+half-written pages in the data files. */
+UNIV_INTERN
+void
+buf_dblwr_init_or_restore_pages(
+/*============================*/
+	ibool	restore_corrupt_pages);	/*!< in: TRUE=restore pages */
+/****************************************************************//**
+frees doublewrite buffer. */
+UNIV_INTERN
+void
+buf_dblwr_free(void);
+/*================*/
+/********************************************************************//**
+Updates the doublewrite buffer when an IO request that is part of an
+LRU or flush batch is completed. */
+UNIV_INTERN
+void
+buf_dblwr_update(void);
+/*==================*/
+/****************************************************************//**
+Determines if a page number is located inside the doublewrite buffer.
+@return TRUE if the location is inside the two blocks of the
+doublewrite buffer */
+UNIV_INTERN
+ibool
+buf_dblwr_page_inside(
+/*==================*/
+	ulint	page_no);	/*!< in: page number */
+/********************************************************************//**
+Posts a buffer page for writing. If the doublewrite memory buffer is
+full, calls buf_dblwr_flush_buffered_writes and waits for for free
+space to appear. */
+UNIV_INTERN
+void
+buf_dblwr_add_to_batch(
+/*====================*/
+	buf_page_t*	bpage);	/*!< in: buffer block to write */
+/********************************************************************//**
+Flushes possible buffered writes from the doublewrite memory buffer to disk,
+and also wakes up the aio thread if simulated aio is used. It is very
+important to call this function after a batch of writes has been posted,
+and also when we may have to wait for a page latch! Otherwise a deadlock
+of threads can occur. */
+UNIV_INTERN
+void
+buf_dblwr_flush_buffered_writes(void);
+/*=================================*/
+/********************************************************************//**
+Writes a page to the doublewrite buffer on disk, sync it, then write
+the page to the datafile and sync the datafile. This function is used
+for single page flushes. If all the buffers allocated for single page
+flushes in the doublewrite buffer are in use we wait here for one to
+become free. We are guaranteed that a slot will become free because any
+thread that is using a slot must also release the slot before leaving
+this function. */
+UNIV_INTERN
+void
+buf_dblwr_write_single_page(
+/*========================*/
+	buf_page_t*	bpage);	/*!< in: buffer block to write */
+
+/** Doublewrite control struct */
+struct buf_dblwr_struct{
+	mutex_t	mutex;		/*!< mutex protecting the first_free field and
+				write_buf */
+	ulint	block1;		/*!< the page number of the first
+				doublewrite block (64 pages) */
+	ulint	block2;		/*!< page number of the second block */
+	ulint	first_free;	/*!< first free position in write_buf measured
+				in units of UNIV_PAGE_SIZE */
+	ulint	s_reserved;	/*!< number of slots currently reserved
+				for single page flushes. */
+	ulint	b_reserved;	/*!< number of slots currently reserved
+				for batch flush. */
+	ibool*	in_use;		/*!< flag used to indicate if a slot is
+				in use. Only used for single page
+				flushes. */
+	ibool	batch_running;	/*!< set to TRUE if currently a batch
+				is being written from the doublewrite
+				buffer. */
+	byte*	write_buf;	/*!< write buffer used in writing to the
+				doublewrite buffer, aligned to an
+				address divisible by UNIV_PAGE_SIZE
+				(which is required by Windows aio) */
+	byte*	write_buf_unaligned;
+				/*!< pointer to write_buf, but unaligned */
+	buf_page_t**
+		buf_block_arr;	/*!< array to store pointers to the buffer
+				blocks which have been cached to write_buf */
+};
+
+
+#endif /* UNIV_HOTBACKUP */
+
+#endif

=== modified file 'storage/innobase/include/buf0types.h'
--- a/storage/innobase/include/buf0types.h	2011-12-07 15:21:53 +0000
+++ b/storage/innobase/include/buf0types.h	2011-12-23 13:17:36 +0000
@@ -38,6 +38,8 @@ typedef	struct buf_pool_struct		buf_pool
 typedef	struct buf_pool_stat_struct	buf_pool_stat_t;
 /** Buffer pool buddy statistics struct */
 typedef	struct buf_buddy_stat_struct	buf_buddy_stat_t;
+/** Doublewrite memory struct */
+typedef struct buf_dblwr_struct		buf_dblwr_t;
 
 /** A buffer frame. @see page_t */
 typedef	byte	buf_frame_t;

=== modified file 'storage/innobase/include/mem0mem.ic'
--- a/storage/innobase/include/mem0mem.ic	2011-12-19 08:43:28 +0000
+++ b/storage/innobase/include/mem0mem.ic	2011-12-28 10:40:55 +0000
@@ -209,10 +209,6 @@ mem_heap_alloc(
 	buf = (byte*) buf + MEM_FIELD_HEADER_SIZE;
 
 #endif
-#ifdef UNIV_SET_MEM_TO_ZERO
-	UNIV_MEM_ALLOC(buf, n);
-	memset(buf, '\0', n);
-#endif
 	UNIV_MEM_ALLOC(buf, n);
 	return(buf);
 }

=== modified file 'storage/innobase/include/mtr0log.ic'
--- a/storage/innobase/include/mtr0log.ic	2011-11-08 10:32:23 +0000
+++ b/storage/innobase/include/mtr0log.ic	2011-12-23 13:17:36 +0000
@@ -26,6 +26,7 @@ Created 12/7/1995 Heikki Tuuri
 #include "mach0data.h"
 #include "ut0lst.h"
 #include "buf0buf.h"
+#include "buf0dblwr.h"
 #include "fsp0types.h"
 #include "trx0sys.h"
 
@@ -203,7 +204,7 @@ mlog_write_initial_log_record_fast(
 	system tablespace */
 	if (space == TRX_SYS_SPACE
 	    && offset >= FSP_EXTENT_SIZE && offset < 3 * FSP_EXTENT_SIZE) {
-		if (trx_doublewrite_buf_is_being_created) {
+		if (buf_dblwr_being_created) {
 			/* Do nothing: we only come to this branch in an
 			InnoDB database creation. We do not redo log
 			anything for the doublewrite buffer pages. */

=== modified file 'storage/innobase/include/os0file.h'
--- a/storage/innobase/include/os0file.h	2011-11-24 07:15:35 +0000
+++ b/storage/innobase/include/os0file.h	2011-12-21 19:35:09 +0000
@@ -46,9 +46,6 @@ Created 10/21/1995 Heikki Tuuri
 /** File node of a tablespace or the log data space */
 typedef	struct fil_node_struct	fil_node_t;
 
-#ifdef UNIV_DO_FLUSH
-extern ibool	os_do_not_call_flush_at_each_write;
-#endif /* UNIV_DO_FLUSH */
 extern ibool	os_has_said_disk_full;
 /** Flag: enable debug printout for asynchronous i/o */
 extern ibool	os_aio_print_debug;

=== modified file 'storage/innobase/include/srv0srv.h'
--- a/storage/innobase/include/srv0srv.h	2011-11-30 10:09:12 +0000
+++ b/storage/innobase/include/srv0srv.h	2011-12-28 08:37:08 +0000
@@ -289,6 +289,7 @@ extern ibool	srv_priority_boost;
 extern ulint	srv_n_lock_wait_count;
 
 extern ulint	srv_truncated_status_writes;
+extern ulint	srv_available_undo_logs;
 
 extern	ulint	srv_mem_pool_size;
 extern	ulint	srv_lock_table_size;
@@ -791,6 +792,7 @@ struct export_var_struct{
 	ulint innodb_rows_deleted;		/*!< srv_n_rows_deleted */
 	ulint innodb_num_open_files;		/*!< fil_n_file_opened */
 	ulint innodb_truncated_status_writes;	/*!< srv_truncated_status_writes */
+	ulint innodb_available_undo_logs;       /*!< srv_available_undo_logs */
 };
 
 /** Thread slot in the thread table.  */

=== modified file 'storage/innobase/include/sync0rw.h'
--- a/storage/innobase/include/sync0rw.h	2011-11-11 08:46:18 +0000
+++ b/storage/innobase/include/sync0rw.h	2011-12-27 12:20:05 +0000
@@ -406,22 +406,6 @@ rw_lock_x_lock_move_ownership(
 	rw_lock_t*	lock);	/*!< in: lock which was x-locked in the
 				buffer read */
 /******************************************************************//**
-Releases a shared mode lock when we know there are no waiters and none
-else will access the lock during the time this function is executed. */
-UNIV_INLINE
-void
-rw_lock_s_unlock_direct(
-/*====================*/
-	rw_lock_t*	lock);	/*!< in/out: rw-lock */
-/******************************************************************//**
-Releases an exclusive mode lock when we know there are no waiters, and
-none else will access the lock durint the time this function is executed. */
-UNIV_INLINE
-void
-rw_lock_x_unlock_direct(
-/*====================*/
-	rw_lock_t*	lock);	/*!< in/out: rw-lock */
-/******************************************************************//**
 Returns the value of writer_count for the lock. Does not reserve the lock
 mutex, so the caller must be sure it is not changed during the call.
 @return	value of writer_count */
@@ -676,9 +660,6 @@ rw_lock_s_lock_gen()
 rw_lock_s_lock_nowait()
 rw_lock_s_unlock_gen()
 rw_lock_free()
-
-Two function APIs rw_lock_x_unlock_direct() and rw_lock_s_unlock_direct()
-do not have any caller/user, they are not instrumented.
 */
 
 #ifdef UNIV_PFS_RWLOCK

=== modified file 'storage/innobase/include/sync0rw.ic'
--- a/storage/innobase/include/sync0rw.ic	2011-11-09 02:40:33 +0000
+++ b/storage/innobase/include/sync0rw.ic	2011-12-27 12:20:05 +0000
@@ -519,31 +519,6 @@ rw_lock_s_unlock_func(
 }
 
 /******************************************************************//**
-Releases a shared mode lock when we know there are no waiters and none
-else will access the lock during the time this function is executed. */
-UNIV_INLINE
-void
-rw_lock_s_unlock_direct(
-/*====================*/
-	rw_lock_t*	lock)	/*!< in/out: rw-lock */
-{
-	ut_ad(lock->lock_word < X_LOCK_DECR);
-
-#ifdef UNIV_SYNC_DEBUG
-	rw_lock_remove_debug_info(lock, 0, RW_LOCK_SHARED);
-#endif
-
-	/* Decrease reader count by incrementing lock_word */
-	lock->lock_word++;
-
-	ut_ad(!lock->waiters);
-	ut_ad(rw_lock_validate(lock));
-#ifdef UNIV_SYNC_PERF_STAT
-	rw_s_exit_count++;
-#endif
-}
-
-/******************************************************************//**
 Releases an exclusive mode lock. */
 UNIV_INLINE
 void
@@ -588,40 +563,6 @@ rw_lock_x_unlock_func(
 	ut_ad(rw_lock_validate(lock));
 
 #ifdef UNIV_SYNC_PERF_STAT
-	rw_x_exit_count++;
-#endif
-}
-
-/******************************************************************//**
-Releases an exclusive mode lock when we know there are no waiters, and
-none else will access the lock during the time this function is executed. */
-UNIV_INLINE
-void
-rw_lock_x_unlock_direct(
-/*====================*/
-	rw_lock_t*	lock)	/*!< in/out: rw-lock */
-{
-	/* Reset the exclusive lock if this thread no longer has an x-mode
-	lock */
-
-	ut_ad((lock->lock_word % X_LOCK_DECR) == 0);
-
-	if (lock->lock_word == 0) {
-		lock->recursive = FALSE;
-		UNIV_MEM_INVALID(&lock->writer_thread,
-				 sizeof lock->writer_thread);
-	}
-
-#ifdef UNIV_SYNC_DEBUG
-	rw_lock_remove_debug_info(lock, 0, RW_LOCK_EX);
-#endif
-
-	lock->lock_word += X_LOCK_DECR;
-
-	ut_ad(!lock->waiters);
-	ut_ad(rw_lock_validate(lock));
-
-#ifdef UNIV_SYNC_PERF_STAT
 	rw_x_exit_count++;
 #endif
 }

=== modified file 'storage/innobase/include/sync0sync.h'
--- a/storage/innobase/include/sync0sync.h	2011-12-19 08:43:28 +0000
+++ b/storage/innobase/include/sync0sync.h	2011-12-23 13:17:36 +0000
@@ -109,7 +109,7 @@ extern mysql_pfs_key_t	srv_monitor_file_
 # ifdef UNIV_SYNC_DEBUG
 extern mysql_pfs_key_t	sync_thread_mutex_key;
 # endif /* UNIV_SYNC_DEBUG */
-extern mysql_pfs_key_t	trx_doublewrite_mutex_key;
+extern mysql_pfs_key_t	buf_dblwr_mutex_key;
 extern mysql_pfs_key_t	trx_undo_mutex_key;
 extern mysql_pfs_key_t	trx_mutex_key;
 extern mysql_pfs_key_t	lock_sys_mutex_key;

=== modified file 'storage/innobase/include/trx0sys.h'
--- a/storage/innobase/include/trx0sys.h	2011-12-21 03:24:58 +0000
+++ b/storage/innobase/include/trx0sys.h	2011-12-23 13:17:36 +0000
@@ -69,53 +69,6 @@ extern ib_int64_t	trx_sys_mysql_bin_log_
 /** The transaction system */
 extern trx_sys_t*	trx_sys;
 
-/** Doublewrite system */
-extern trx_doublewrite_t*	trx_doublewrite;
-/** The following is set to TRUE when we are upgrading from pre-4.1
-format data files to the multiple tablespaces format data files */
-extern ibool			trx_doublewrite_must_reset_space_ids;
-/** Set to TRUE when the doublewrite buffer is being created */
-extern ibool			trx_doublewrite_buf_is_being_created;
-/** The following is TRUE when we are using the database in the
-post-4.1 format, i.e., we have successfully upgraded, or have created
-a new database installation */
-extern ibool			trx_sys_multiple_tablespace_format;
-
-/****************************************************************//**
-Creates the doublewrite buffer to a new InnoDB installation. The header of the
-doublewrite buffer is placed on the trx system header page. */
-UNIV_INTERN
-void
-trx_sys_create_doublewrite_buf(void);
-/*================================*/
-/****************************************************************//**
-At a database startup initializes the doublewrite buffer memory structure if
-we already have a doublewrite buffer created in the data files. If we are
-upgrading to an InnoDB version which supports multiple tablespaces, then this
-function performs the necessary update operations. If we are in a crash
-recovery, this function uses a possible doublewrite buffer to restore
-half-written pages in the data files. */
-UNIV_INTERN
-void
-trx_sys_doublewrite_init_or_restore_pages(
-/*======================================*/
-	ibool	restore_corrupt_pages);	/*!< in: TRUE=restore pages */
-/****************************************************************//**
-Marks the trx sys header when we have successfully upgraded to the >= 4.1.x
-multiple tablespace format. */
-UNIV_INTERN
-void
-trx_sys_mark_upgraded_to_multiple_tablespaces(void);
-/*===============================================*/
-/****************************************************************//**
-Determines if a page number is located inside the doublewrite buffer.
-@return TRUE if the location is inside the two blocks of the
-doublewrite buffer */
-UNIV_INTERN
-ibool
-trx_doublewrite_page_inside(
-/*========================*/
-	ulint	page_no);	/*!< in: page number */
 /***************************************************************//**
 Checks if a page address is the trx sys header page.
 @return	TRUE if trx sys header page */
@@ -642,36 +595,6 @@ identifier is added to this 64-bit const
 /* @} */
 
 #ifndef UNIV_HOTBACKUP
-/** Doublewrite control struct */
-struct trx_doublewrite_struct{
-	mutex_t	mutex;		/*!< mutex protecting the first_free field and
-				write_buf */
-	ulint	block1;		/*!< the page number of the first
-				doublewrite block (64 pages) */
-	ulint	block2;		/*!< page number of the second block */
-	ulint	first_free;	/*!< first free position in write_buf measured
-				in units of UNIV_PAGE_SIZE */
-	ulint	s_reserved;	/*!< number of slots currently reserved
-				for single page flushes. */
-	ulint	b_reserved;	/*!< number of slots currently reserved
-				for batch flush. */
-	ibool*	in_use;		/*!< flag used to indicate if a slot is
-				in use. Only used for single page
-				flushes. */
-	ibool	batch_running;	/*!< set to TRUE if currently a batch
-				is being written from the doublewrite
-				buffer. */
-	byte*	write_buf;	/*!< write buffer used in writing to the
-				doublewrite buffer, aligned to an
-				address divisible by UNIV_PAGE_SIZE
-				(which is required by Windows aio) */
-	byte*	write_buf_unaligned;
-				/*!< pointer to write_buf, but unaligned */
-	buf_page_t**
-		buf_block_arr;	/*!< array to store pointers to the buffer
-				blocks which have been cached to write_buf */
-};
-
 /** The transaction system central memory data structure. */
 struct trx_sys_struct{
 

=== modified file 'storage/innobase/include/trx0types.h'
--- a/storage/innobase/include/trx0types.h	2011-11-08 10:32:23 +0000
+++ b/storage/innobase/include/trx0types.h	2011-12-23 13:17:36 +0000
@@ -60,8 +60,6 @@ typedef struct trx_struct	trx_t;
 typedef struct trx_lock_struct	trx_lock_t;
 /** Transaction system */
 typedef struct trx_sys_struct	trx_sys_t;
-/** Doublewrite information */
-typedef struct trx_doublewrite_struct	trx_doublewrite_t;
 /** Signal */
 typedef struct trx_sig_struct	trx_sig_t;
 /** Rollback segment */

=== modified file 'storage/innobase/include/univ.i'
--- a/storage/innobase/include/univ.i	2011-11-29 15:32:32 +0000
+++ b/storage/innobase/include/univ.i	2011-12-28 10:40:55 +0000
@@ -161,14 +161,6 @@ be excluded from instrumentation. */
 /*			DEBUG VERSION CONTROL
 			===================== */
 
-/* The following flag will make InnoDB to initialize
-all memory it allocates to zero. It hides Purify
-warnings about reading unallocated memory unless
-memory is read outside the allocated blocks. */
-/*
-#define UNIV_INIT_MEM_TO_ZERO
-*/
-
 /* When this macro is defined then additional test functions will be
 compiled. These functions live at the end of each relevant source file
 and have "test_" prefix. These functions are not called from anywhere in
@@ -244,15 +236,6 @@ operations (very slow); also UNIV_DEBUG
 #define UNIV_BTR_DEBUG				/* check B-tree links */
 #define UNIV_LIGHT_MEM_DEBUG			/* light memory debugging */
 
-#ifdef HAVE_purify
-/* The following sets all new allocated memory to zero before use:
-this can be used to eliminate unnecessary Purify warnings, but note that
-it also masks many bugs Purify could detect. For detailed Purify analysis it
-is best to remove the define below and look through the warnings one
-by one. */
-#define UNIV_SET_MEM_TO_ZERO
-#endif
-
 /*
 #define UNIV_SQL_DEBUG
 #define UNIV_LOG_DEBUG

=== modified file 'storage/innobase/include/ut0mem.h'
--- a/storage/innobase/include/ut0mem.h	2011-11-20 20:17:41 +0000
+++ b/storage/innobase/include/ut0mem.h	2011-12-28 10:40:55 +0000
@@ -78,40 +78,19 @@ ut_mem_init(void);
 /*=============*/
 
 /**********************************************************************//**
-Allocates memory. Sets it also to zero if UNIV_SET_MEM_TO_ZERO is
-defined and set_to_zero is TRUE.
+Allocates memory.
 @return	own: allocated memory */
 UNIV_INTERN
 void*
 ut_malloc_low(
 /*==========*/
 	ulint	n,			/*!< in: number of bytes to allocate */
-	ibool	set_to_zero,		/*!< in: TRUE if allocated memory
-					should be set to zero if
-					UNIV_SET_MEM_TO_ZERO is defined */
-	ibool	assert_on_error);	/*!< in: if TRUE, we crash mysqld if
+	ibool	assert_on_error)	/*!< in: if TRUE, we crash mysqld if
 					the memory cannot be allocated */
+	__attribute__((malloc));
 /**********************************************************************//**
-Allocates memory. Sets it also to zero if UNIV_SET_MEM_TO_ZERO is
-defined.
-@return	own: allocated memory */
-UNIV_INTERN
-void*
-ut_malloc(
-/*======*/
-	ulint	n);	/*!< in: number of bytes to allocate */
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Tests if malloc of n bytes would succeed. ut_malloc() asserts if memory runs
-out. It cannot be used if we want to return an error message. Prints to
-stderr a message if fails.
-@return	TRUE if succeeded */
-UNIV_INTERN
-ibool
-ut_test_malloc(
-/*===========*/
-	ulint	n);	/*!< in: try to allocate this many bytes */
-#endif /* !UNIV_HOTBACKUP */
+Allocates memory. */
+#define ut_malloc(n) ut_malloc_low(n, TRUE)
 /**********************************************************************//**
 Frees a memory block allocated with ut_malloc. Freeing a NULL pointer is
 a nop. */

=== modified file 'storage/innobase/include/ut0rnd.ic'
--- a/storage/innobase/include/ut0rnd.ic	2011-11-21 04:58:23 +0000
+++ b/storage/innobase/include/ut0rnd.ic	2011-12-22 02:47:18 +0000
@@ -117,7 +117,7 @@ ut_rnd_interval(
 
 	rnd = ut_rnd_gen_ulint();
 
-	return(low + (rnd % (high - low + 1)));
+	return(low + (rnd % (high - low)));
 }
 
 /*********************************************************//**

=== modified file 'storage/innobase/log/log0recv.cc'
--- a/storage/innobase/log/log0recv.cc	2011-12-19 08:43:28 +0000
+++ b/storage/innobase/log/log0recv.cc	2011-12-23 13:17:36 +0000
@@ -2866,7 +2866,7 @@ recv_init_crash_recovery(void)
 			" half-written data pages from"
 			" the doublewrite\n"
 			"InnoDB: buffer...\n");
-		trx_sys_doublewrite_init_or_restore_pages(TRUE);
+		buf_dblwr_init_or_restore_pages(TRUE);
 	}
 }
 
@@ -3139,7 +3139,7 @@ recv_recovery_from_checkpoint_start_func
 
 		if (!recv_needed_recovery) {
 			/* Init the doublewrite buffer memory structure */
-			trx_sys_doublewrite_init_or_restore_pages(FALSE);
+			buf_dblwr_init_or_restore_pages(FALSE);
 		}
 	}
 

=== modified file 'storage/innobase/mem/mem0pool.cc'
--- a/storage/innobase/mem/mem0pool.cc	2011-12-19 08:43:28 +0000
+++ b/storage/innobase/mem/mem0pool.cc	2011-12-28 10:40:55 +0000
@@ -228,11 +228,7 @@ mem_pool_create(
 
 	pool = static_cast<mem_pool_t*>(ut_malloc(sizeof(mem_pool_t)));
 
-	/* We do not set the memory to zero (FALSE) in the pool,
-	but only when allocated at a higher level in mem0mem.cc.
-	This is to avoid masking useful Purify warnings. */
-
-	pool->buf = static_cast<byte*>(ut_malloc_low(size, FALSE, TRUE));
+	pool->buf = static_cast<byte*>(ut_malloc_low(size, TRUE));
 	pool->size = size;
 
 	mutex_create(mem_pool_mutex_key, &pool->mutex, SYNC_MEM_POOL);

=== modified file 'storage/innobase/os/os0file.cc'
--- a/storage/innobase/os/os0file.cc	2011-12-01 10:04:44 +0000
+++ b/storage/innobase/os/os0file.cc	2011-12-21 19:35:09 +0000
@@ -73,14 +73,6 @@ UNIV_INTERN ulint	os_innodb_umask
 UNIV_INTERN ulint	os_innodb_umask		= 0;
 #endif
 
-#ifdef UNIV_DO_FLUSH
-/* If the following is set to TRUE, we do not call os_file_flush in every
-os_file_write. We can set this TRUE when the doublewrite buffer is used. */
-UNIV_INTERN ibool	os_do_not_call_flush_at_each_write	= FALSE;
-#else
-/* We do not call os_file_flush in every os_file_write. */
-#endif /* UNIV_DO_FLUSH */
-
 #ifndef UNIV_HOTBACKUP
 /* We use these mutexes to protect lseek + file i/o operation, if the
 OS does not provide an atomic pread or pwrite, or similar */
@@ -2355,19 +2347,6 @@ os_file_pwrite(
 	MONITOR_ATOMIC_DEC(MONITOR_OS_PENDING_WRITES);
 #endif /* !HAVE_ATOMIC_BUILTINS || UNIV_WORD < 8 */
 
-# ifdef UNIV_DO_FLUSH
-	if (srv_unix_file_flush_method != SRV_UNIX_LITTLESYNC
-	    && srv_unix_file_flush_method != SRV_UNIX_NOSYNC
-	    && !os_do_not_call_flush_at_each_write) {
-
-		/* Always do fsync to reduce the probability that when
-		the OS crashes, a database page is only partially
-		physically written to disk. */
-
-		ut_a(TRUE == os_file_flush(file));
-	}
-# endif /* UNIV_DO_FLUSH */
-
 	return(ret);
 #else
 	{
@@ -2398,19 +2377,6 @@ os_file_pwrite(
 
 		ret = write(file, buf, (ssize_t) n);
 
-# ifdef UNIV_DO_FLUSH
-		if (srv_unix_file_flush_method != SRV_UNIX_LITTLESYNC
-		    && srv_unix_file_flush_method != SRV_UNIX_NOSYNC
-		    && !os_do_not_call_flush_at_each_write) {
-
-			/* Always do fsync to reduce the probability that when
-			the OS crashes, a database page is only partially
-			physically written to disk. */
-
-			ut_a(TRUE == os_file_flush(file));
-		}
-# endif /* UNIV_DO_FLUSH */
-
 func_exit:
 # ifndef UNIV_HOTBACKUP
 		os_mutex_exit(os_file_seek_mutexes[i]);
@@ -2773,15 +2739,6 @@ retry:
 
 	ret = WriteFile(file, buf, (DWORD) n, &len, NULL);
 
-	/* Always do fsync to reduce the probability that when the OS crashes,
-	a database page is only partially physically written to disk. */
-
-# ifdef UNIV_DO_FLUSH
-	if (!os_do_not_call_flush_at_each_write) {
-		ut_a(TRUE == os_file_flush(file));
-	}
-# endif /* UNIV_DO_FLUSH */
-
 #ifndef UNIV_HOTBACKUP
 	os_mutex_exit(os_file_seek_mutexes[i]);
 #endif /* !UNIV_HOTBACKUP */
@@ -4312,16 +4269,8 @@ os_aio_windows_handle(
 	*type = slot->type;
 
 	if (ret && len == slot->len) {
-		ret_val = TRUE;
 
-#ifdef UNIV_DO_FLUSH
-		if (slot->type == OS_FILE_WRITE
-		    && !os_do_not_call_flush_at_each_write) {
-			if (!os_file_flush(slot->file)) {
-				ut_error;
-			}
-		}
-#endif /* UNIV_DO_FLUSH */
+		ret_val = TRUE;
 	} else if (os_file_handle_error(slot->name, "Windows aio")) {
 
 		retry = TRUE;
@@ -4613,15 +4562,8 @@ found:
 	*type = slot->type;
 
 	if ((slot->ret == 0) && (slot->n_bytes == (long) slot->len)) {
-		ret = TRUE;
 
-#ifdef UNIV_DO_FLUSH
-		if (slot->type == OS_FILE_WRITE
-		    && !os_do_not_call_flush_at_each_write)
-		    && !os_file_flush(slot->file) {
-			ut_error;
-		}
-#endif /* UNIV_DO_FLUSH */
+		ret = TRUE;
 	} else {
 		errno = -slot->ret;
 

=== modified file 'storage/innobase/os/os0proc.cc'
--- a/storage/innobase/os/os0proc.cc	2011-11-30 10:34:38 +0000
+++ b/storage/innobase/os/os0proc.cc	2011-12-28 10:40:55 +0000
@@ -111,9 +111,6 @@ os_mem_alloc_large(
 		os_fast_mutex_lock(&ut_list_mutex);
 		ut_total_allocated_memory += size;
 		os_fast_mutex_unlock(&ut_list_mutex);
-# ifdef UNIV_SET_MEM_TO_ZERO
-		memset(ptr, '\0', size);
-# endif
 		UNIV_MEM_ALLOC(ptr, size);
 		return(ptr);
 	}

=== modified file 'storage/innobase/row/row0sel.cc'
--- a/storage/innobase/row/row0sel.cc	2011-12-05 08:30:46 +0000
+++ b/storage/innobase/row/row0sel.cc	2011-12-29 14:32:49 +0000
@@ -4208,10 +4208,12 @@ rec_loop:
 #ifdef UNIV_SEARCH_DEBUG
 	/*
 	fputs("Using ", stderr);
-	dict_index_name_print(stderr, index);
+	dict_index_name_print(stderr, trx, index);
 	fprintf(stderr, " cnt %lu ; Page no %lu\n", cnt,
 	page_get_page_no(page_align(rec)));
-	rec_print(rec);
+	rec_print(stderr, rec, index);
+	printf("delete-mark: %lu\n",
+	       rec_get_deleted_flag(rec, page_rec_is_comp(rec)));
 	*/
 #endif /* UNIV_SEARCH_DEBUG */
 
@@ -4392,8 +4394,10 @@ wrong_offs:
 			btr_pcur_store_position(pcur, &mtr);
 
 			err = DB_RECORD_NOT_FOUND;
-			/* ut_print_name(stderr, index->name);
-			fputs(" record not found 3\n", stderr); */
+#if 0
+			ut_print_name(stderr, trx, FALSE, index->name);
+			fputs(" record not found 3\n", stderr);
+#endif
 
 			goto normal_return;
 		}
@@ -4431,8 +4435,10 @@ wrong_offs:
 			btr_pcur_store_position(pcur, &mtr);
 
 			err = DB_RECORD_NOT_FOUND;
-			/* ut_print_name(stderr, index->name);
-			fputs(" record not found 4\n", stderr); */
+#if 0
+			ut_print_name(stderr, trx, FALSE, index->name);
+			fputs(" record not found 4\n", stderr);
+#endif
 
 			goto normal_return;
 		}

=== modified file 'storage/innobase/srv/srv0srv.cc'
--- a/storage/innobase/srv/srv0srv.cc	2011-12-19 08:43:28 +0000
+++ b/storage/innobase/srv/srv0srv.cc	2011-12-28 08:37:08 +0000
@@ -386,6 +386,7 @@ static ulint		srv_n_rows_deleted_old		=
 static ulint		srv_n_rows_read_old		= 0;
 
 UNIV_INTERN ulint	srv_truncated_status_writes	= 0;
+UNIV_INTERN ulint	srv_available_undo_logs         = 0;
 
 /* Set the following to 0 if you want InnoDB to write messages on
 stderr on startup/shutdown. */
@@ -1368,6 +1369,7 @@ srv_export_innodb_status(void)
 	export_vars.innodb_rows_deleted = srv_n_rows_deleted;
 	export_vars.innodb_num_open_files = fil_n_file_opened;
 	export_vars.innodb_truncated_status_writes = srv_truncated_status_writes;
+	export_vars.innodb_available_undo_logs = srv_available_undo_logs;
 
 	mutex_exit(&srv_innodb_monitor_mutex);
 }

=== modified file 'storage/innobase/srv/srv0start.cc'
--- a/storage/innobase/srv/srv0start.cc	2011-12-08 20:20:49 +0000
+++ b/storage/innobase/srv/srv0start.cc	2011-12-28 08:37:08 +0000
@@ -2148,10 +2148,10 @@ innobase_start_or_create_for_mysql(void)
 	/* fprintf(stderr, "Max allowed record size %lu\n",
 	page_get_free_space_of_empty() / 2); */
 
-	if (trx_doublewrite == NULL) {
+	if (buf_dblwr == NULL) {
 		/* Create the doublewrite buffer to a new tablespace */
 
-		trx_sys_create_doublewrite_buf();
+		buf_dblwr_create();
 	}
 
 	/* Here the double write buffer has already been created and so
@@ -2170,12 +2170,11 @@ innobase_start_or_create_for_mysql(void)
 	ut_a(srv_undo_logs > 0);
 	ut_a(srv_undo_logs <= TRX_SYS_N_RSEGS);
 
-	/* Note: We set the config variable here to the number of rollback
-	segments that are actually active. This allows the user to discover
-	the currently configured number of undo segments in an existing
-	instance. */
+	/* The number of rsegs that exist in InnoDB is given by status
+	variable srv_available_undo_logs. The number of rsegs to use can
+	be set using the dynamic global variable srv_undo_logs. */
 
-	srv_undo_logs = trx_sys_create_rsegs(
+	srv_available_undo_logs = trx_sys_create_rsegs(
 		srv_undo_tablespaces, srv_undo_logs);
 
 	/* Create the thread which watches the timeouts for lock waits */
@@ -2388,67 +2387,6 @@ innobase_start_or_create_for_mysql(void)
 
 	fflush(stderr);
 
-	if (trx_doublewrite_must_reset_space_ids) {
-		/* Actually, we did not change the undo log format between
-		4.0 and 4.1.1, and we would not need to run purge to
-		completion. Note also that the purge algorithm in 4.1.1
-		can process the history list again even after a full
-		purge, because our algorithm does not cut the end of the
-		history list in all cases so that it would become empty
-		after a full purge. That mean that we may purge 4.0 type
-		undo log even after this phase.
-
-		The insert buffer record format changed between 4.0 and
-		4.1.1. It is essential that the insert buffer is emptied
-		here! */
-
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: You are upgrading to an"
-			" InnoDB version which allows multiple\n");
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: tablespaces. Wait that purge"
-			" and insert buffer merge run to\n");
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: completion...\n");
-		for (;;) {
-			os_thread_sleep(1000000);
-
-			if (0 == strcmp(srv_main_thread_op_info,
-					"waiting for server activity")) {
-
-				ut_a(ibuf_is_empty());
-
-				break;
-			}
-		}
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: Full purge and insert buffer merge"
-			" completed.\n");
-
-		trx_sys_mark_upgraded_to_multiple_tablespaces();
-
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: You have now successfully upgraded"
-			" to the multiple tablespaces\n");
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: format. You should NOT DOWNGRADE"
-			" to an earlier version of\n");
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: InnoDB! But if you absolutely need to"
-			" downgrade, see\n");
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: " REFMAN "multiple-tablespaces.html\n"
-			" InnoDB: for instructions.\n");
-	}
-
 	if (srv_force_recovery == 0) {
 		/* In the insert buffer we may have even bigger tablespace
 		id's, because we may have dropped those tablespaces, but

=== modified file 'storage/innobase/sync/sync0rw.cc'
--- a/storage/innobase/sync/sync0rw.cc	2011-12-19 08:43:28 +0000
+++ b/storage/innobase/sync/sync0rw.cc	2011-12-27 15:39:56 +0000
@@ -565,7 +565,7 @@ rw_lock_x_lock_wait(
 
 /******************************************************************//**
 Low-level function for acquiring an exclusive lock.
-@return	RW_LOCK_NOT_LOCKED if did not succeed, RW_LOCK_EX if success. */
+@return	FALSE if did not succeed, TRUE if success. */
 UNIV_INLINE
 ibool
 rw_lock_x_lock_low(

=== modified file 'storage/innobase/trx/trx0sys.cc'
--- a/storage/innobase/trx/trx0sys.cc	2011-12-21 03:24:58 +0000
+++ b/storage/innobase/trx/trx0sys.cc	2011-12-23 13:17:36 +0000
@@ -58,19 +58,6 @@ typedef struct file_format_struct	file_f
 
 /** The transaction system */
 UNIV_INTERN trx_sys_t*		trx_sys		= NULL;
-/** The doublewrite buffer */
-UNIV_INTERN trx_doublewrite_t*	trx_doublewrite = NULL;
-
-/** The following is set to TRUE when we are upgrading from pre-4.1
-format data files to the multiple tablespaces format data files */
-UNIV_INTERN ibool	trx_doublewrite_must_reset_space_ids	= FALSE;
-/** Set to TRUE when the doublewrite buffer is being created */
-UNIV_INTERN ibool	trx_doublewrite_buf_is_being_created = FALSE;
-
-/** The following is TRUE when we are using the database in the
-post-4.1 format, i.e., we have successfully upgraded, or have created
-a new database installation */
-UNIV_INTERN ibool	trx_sys_multiple_tablespace_format	= FALSE;
 
 /** In a MySQL replication slave, in crash recovery we store the master log
 file name and position here. */
@@ -130,7 +117,6 @@ static const ulint	FILE_FORMAT_NAME_N
 
 #ifdef UNIV_PFS_MUTEX
 /* Key to register the mutex with performance schema */
-UNIV_INTERN mysql_pfs_key_t	trx_doublewrite_mutex_key;
 UNIV_INTERN mysql_pfs_key_t	file_format_max_mutex_key;
 UNIV_INTERN mysql_pfs_key_t	trx_sys_mutex_key;
 #endif /* UNIV_PFS_RWLOCK */
@@ -141,547 +127,6 @@ updated via SET GLOBAL innodb_file_forma
 or create a table. */
 static	file_format_t	file_format_max;
 
-/****************************************************************//**
-Determines if a page number is located inside the doublewrite buffer.
-@return TRUE if the location is inside the two blocks of the
-doublewrite buffer */
-UNIV_INTERN
-ibool
-trx_doublewrite_page_inside(
-/*========================*/
-	ulint	page_no)	/*!< in: page number */
-{
-	if (trx_doublewrite == NULL) {
-
-		return(FALSE);
-	}
-
-	if (page_no >= trx_doublewrite->block1
-	    && page_no < trx_doublewrite->block1
-	    + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
-		return(TRUE);
-	}
-
-	if (page_no >= trx_doublewrite->block2
-	    && page_no < trx_doublewrite->block2
-	    + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
-		return(TRUE);
-	}
-
-	return(FALSE);
-}
-
-/****************************************************************//**
-Creates or initialializes the doublewrite buffer at a database start. */
-static
-void
-trx_doublewrite_init(
-/*=================*/
-	byte*	doublewrite)	/*!< in: pointer to the doublewrite buf
-				header on trx sys page */
-{
-	ulint	buf_size;
-
-	trx_doublewrite = static_cast<trx_doublewrite_t*>(
-		mem_zalloc(sizeof(trx_doublewrite_t)));
-
-	/* There are two blocks of same size in the doublewrite
-	buffer. */
-	buf_size = 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE;
-
-	/* There must be atleast one buffer for single page writes
-	and one buffer for batch writes. */
-	ut_a(srv_doublewrite_batch_size > 0
-	     && srv_doublewrite_batch_size < buf_size);
-
-	/* Since we now start to use the doublewrite buffer, no need to call
-	fsync() after every write to a data file */
-#ifdef UNIV_DO_FLUSH
-	os_do_not_call_flush_at_each_write = TRUE;
-#endif /* UNIV_DO_FLUSH */
-
-	mutex_create(trx_doublewrite_mutex_key,
-		     &trx_doublewrite->mutex, SYNC_DOUBLEWRITE);
-
-	trx_doublewrite->first_free = 0;
-	trx_doublewrite->s_reserved = 0;
-	trx_doublewrite->b_reserved = 0;
-
-	trx_doublewrite->block1 = mach_read_from_4(
-		doublewrite + TRX_SYS_DOUBLEWRITE_BLOCK1);
-	trx_doublewrite->block2 = mach_read_from_4(
-		doublewrite + TRX_SYS_DOUBLEWRITE_BLOCK2);
-
-	trx_doublewrite->in_use = static_cast<ibool*>(
-		mem_zalloc(buf_size * sizeof(ibool)));
-
-	trx_doublewrite->write_buf_unaligned = static_cast<byte*>(
-		ut_malloc((1 + buf_size) * UNIV_PAGE_SIZE));
-
-	trx_doublewrite->write_buf = static_cast<byte*>(
-		ut_align(trx_doublewrite->write_buf_unaligned,
-			 UNIV_PAGE_SIZE));
-
-	trx_doublewrite->buf_block_arr = static_cast<buf_page_t**>(
-		mem_zalloc(buf_size * sizeof(void*)));
-}
-
-/****************************************************************//**
-Calls buf_page_get() on the TRX_SYS_PAGE and returns a pointer to the
-doublewrite buffer within it.
-@return	pointer to the doublewrite buffer within the filespace header
-page. */
-UNIV_INLINE
-byte*
-trx_sys_doublewrite_get(
-/*====================*/
-	mtr_t*	mtr)	/*!< in/out: MTR to hold the page latch */
-{
-	buf_block_t*	block;
-
-	block = buf_page_get(TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO,
-			     RW_X_LATCH, mtr);
-	buf_block_dbg_add_level(block, SYNC_NO_ORDER_CHECK);
-
-	return(buf_block_get_frame(block) + TRX_SYS_DOUBLEWRITE);
-}
-
-/****************************************************************//**
-Marks the trx sys header when we have successfully upgraded to the >= 4.1.x
-multiple tablespace format. */
-UNIV_INTERN
-void
-trx_sys_mark_upgraded_to_multiple_tablespaces(void)
-/*===============================================*/
-{
-	byte*		doublewrite;
-	mtr_t		mtr;
-
-	/* We upgraded to 4.1.x and reset the space id fields in the
-	doublewrite buffer. Let us mark to the trx_sys header that the upgrade
-	has been done. */
-
-	mtr_start(&mtr);
-
-	doublewrite = trx_sys_doublewrite_get(&mtr);
-
-	mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED,
-			 TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N,
-			 MLOG_4BYTES, &mtr);
-	mtr_commit(&mtr);
-
-	/* Flush the modified pages to disk and make a checkpoint */
-	log_make_checkpoint_at(LSN_MAX, TRUE);
-
-	trx_sys_multiple_tablespace_format = TRUE;
-}
-
-/****************************************************************//**
-Creates the doublewrite buffer to a new InnoDB installation. The header of the
-doublewrite buffer is placed on the trx system header page. */
-UNIV_INTERN
-void
-trx_sys_create_doublewrite_buf(void)
-/*================================*/
-{
-	buf_block_t*	block2;
-#ifdef UNIV_SYNC_DEBUG
-	buf_block_t*	new_block;
-#endif /* UNIV_SYNC_DEBUG */
-	byte*	doublewrite;
-	byte*	fseg_header;
-	ulint	page_no;
-	ulint	prev_page_no;
-	ulint	i;
-	mtr_t	mtr;
-
-	if (trx_doublewrite) {
-		/* Already inited */
-
-		return;
-	}
-
-start_again:
-	mtr_start(&mtr);
-	trx_doublewrite_buf_is_being_created = TRUE;
-
-	doublewrite = trx_sys_doublewrite_get(&mtr);
-
-	if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC)
-	    == TRX_SYS_DOUBLEWRITE_MAGIC_N) {
-		/* The doublewrite buffer has already been created:
-		just read in some numbers */
-
-		trx_doublewrite_init(doublewrite);
-
-		mtr_commit(&mtr);
-		trx_doublewrite_buf_is_being_created = FALSE;
-	} else {
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			" InnoDB: Doublewrite buffer not found:"
-			" creating new\n");
-
-		if (buf_pool_get_curr_size()
-		    < ((2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
-			+ FSP_EXTENT_SIZE / 2 + 100)
-		       * UNIV_PAGE_SIZE)) {
-			fprintf(stderr,
-				"InnoDB: Cannot create doublewrite buffer:"
-				" you must\n"
-				"InnoDB: increase your buffer pool size.\n"
-				"InnoDB: Cannot continue operation.\n");
-
-			exit(1);
-		}
-
-		block2 = fseg_create(TRX_SYS_SPACE, TRX_SYS_PAGE_NO,
-				     TRX_SYS_DOUBLEWRITE
-				     + TRX_SYS_DOUBLEWRITE_FSEG, &mtr);
-
-		/* fseg_create acquires a second latch on the page,
-		therefore we must declare it: */
-
-		buf_block_dbg_add_level(block2, SYNC_NO_ORDER_CHECK);
-
-		if (block2 == NULL) {
-			fprintf(stderr,
-				"InnoDB: Cannot create doublewrite buffer:"
-				" you must\n"
-				"InnoDB: increase your tablespace size.\n"
-				"InnoDB: Cannot continue operation.\n");
-
-			/* We exit without committing the mtr to prevent
-			its modifications to the database getting to disk */
-
-			exit(1);
-		}
-
-		fseg_header = doublewrite + TRX_SYS_DOUBLEWRITE_FSEG;
-		prev_page_no = 0;
-
-		for (i = 0; i < 2 * TRX_SYS_DOUBLEWRITE_BLOCK_SIZE
-			     + FSP_EXTENT_SIZE / 2; i++) {
-			page_no = fseg_alloc_free_page(fseg_header,
-						       prev_page_no + 1,
-						       FSP_UP, &mtr);
-			if (page_no == FIL_NULL) {
-				fprintf(stderr,
-					"InnoDB: Cannot create doublewrite"
-					" buffer: you must\n"
-					"InnoDB: increase your"
-					" tablespace size.\n"
-					"InnoDB: Cannot continue operation.\n"
-					);
-
-				exit(1);
-			}
-
-			/* We read the allocated pages to the buffer pool;
-			when they are written to disk in a flush, the space
-			id and page number fields are also written to the
-			pages. When we at database startup read pages
-			from the doublewrite buffer, we know that if the
-			space id and page number in them are the same as
-			the page position in the tablespace, then the page
-			has not been written to in doublewrite. */
-
-#ifdef UNIV_SYNC_DEBUG
-			new_block =
-#endif /* UNIV_SYNC_DEBUG */
-			buf_page_get(TRX_SYS_SPACE, 0, page_no,
-				     RW_X_LATCH, &mtr);
-			buf_block_dbg_add_level(new_block,
-						SYNC_NO_ORDER_CHECK);
-
-			if (i == FSP_EXTENT_SIZE / 2) {
-				ut_a(page_no == FSP_EXTENT_SIZE);
-				mlog_write_ulint(doublewrite
-						 + TRX_SYS_DOUBLEWRITE_BLOCK1,
-						 page_no, MLOG_4BYTES, &mtr);
-				mlog_write_ulint(doublewrite
-						 + TRX_SYS_DOUBLEWRITE_REPEAT
-						 + TRX_SYS_DOUBLEWRITE_BLOCK1,
-						 page_no, MLOG_4BYTES, &mtr);
-
-			} else if (i == FSP_EXTENT_SIZE / 2
-				   + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
-				ut_a(page_no == 2 * FSP_EXTENT_SIZE);
-				mlog_write_ulint(doublewrite
-						 + TRX_SYS_DOUBLEWRITE_BLOCK2,
-						 page_no, MLOG_4BYTES, &mtr);
-				mlog_write_ulint(doublewrite
-						 + TRX_SYS_DOUBLEWRITE_REPEAT
-						 + TRX_SYS_DOUBLEWRITE_BLOCK2,
-						 page_no, MLOG_4BYTES, &mtr);
-
-			} else if (i > FSP_EXTENT_SIZE / 2) {
-				ut_a(page_no == prev_page_no + 1);
-			}
-
-			if (((i + 1) & 15) == 0) {
-				/* rw_locks can only be recursively x-locked
-				2048 times. (on 32 bit platforms,
-				(lint) 0 - (X_LOCK_DECR * 2049)
-				is no longer a negative number, and thus
-				lock_word becomes like a shared lock).
-				For 4k page size this loop will
-				lock the fseg header too many times. Since
-				this code is not done while any other threads
-				are active, restart the MTR occasionally. */
-				mtr_commit(&mtr);
-				mtr_start(&mtr);
-				doublewrite = trx_sys_doublewrite_get(&mtr);
-				fseg_header = doublewrite
-					      + TRX_SYS_DOUBLEWRITE_FSEG;
-			}
-
-			prev_page_no = page_no;
-		}
-
-		mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC,
-				 TRX_SYS_DOUBLEWRITE_MAGIC_N,
-				 MLOG_4BYTES, &mtr);
-		mlog_write_ulint(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC
-				 + TRX_SYS_DOUBLEWRITE_REPEAT,
-				 TRX_SYS_DOUBLEWRITE_MAGIC_N,
-				 MLOG_4BYTES, &mtr);
-
-		mlog_write_ulint(doublewrite
-				 + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED,
-				 TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N,
-				 MLOG_4BYTES, &mtr);
-		mtr_commit(&mtr);
-
-		/* Flush the modified pages to disk and make a checkpoint */
-		log_make_checkpoint_at(LSN_MAX, TRUE);
-
-		/* Remove doublewrite pages from LRU */
-		buf_pool_invalidate();
-
-		ut_print_timestamp(stderr);
-		fprintf(stderr, " InnoDB: Doublewrite buffer created\n");
-
-		trx_sys_multiple_tablespace_format = TRUE;
-
-		goto start_again;
-	}
-}
-
-/****************************************************************//**
-At a database startup initializes the doublewrite buffer memory structure if
-we already have a doublewrite buffer created in the data files. If we are
-upgrading to an InnoDB version which supports multiple tablespaces, then this
-function performs the necessary update operations. If we are in a crash
-recovery, this function uses a possible doublewrite buffer to restore
-half-written pages in the data files. */
-UNIV_INTERN
-void
-trx_sys_doublewrite_init_or_restore_pages(
-/*======================================*/
-	ibool	restore_corrupt_pages)	/*!< in: TRUE=restore pages */
-{
-	byte*	buf;
-	byte*	read_buf;
-	byte*	unaligned_read_buf;
-	ulint	block1;
-	ulint	block2;
-	ulint	source_page_no;
-	byte*	page;
-	byte*	doublewrite;
-	ulint	space_id;
-	ulint	page_no;
-	ulint	i;
-
-	/* We do the file i/o past the buffer pool */
-
-	unaligned_read_buf = static_cast<byte*>(ut_malloc(2 * UNIV_PAGE_SIZE));
-
-	read_buf = static_cast<byte*>(
-		ut_align(unaligned_read_buf, UNIV_PAGE_SIZE));
-
-	/* Read the trx sys header to check if we are using the doublewrite
-	buffer */
-
-	fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, TRX_SYS_PAGE_NO, 0,
-	       UNIV_PAGE_SIZE, read_buf, NULL);
-	doublewrite = read_buf + TRX_SYS_DOUBLEWRITE;
-
-	if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_MAGIC)
-	    == TRX_SYS_DOUBLEWRITE_MAGIC_N) {
-		/* The doublewrite buffer has been created */
-
-		trx_doublewrite_init(doublewrite);
-
-		block1 = trx_doublewrite->block1;
-		block2 = trx_doublewrite->block2;
-
-		buf = trx_doublewrite->write_buf;
-	} else {
-		goto leave_func;
-	}
-
-	if (mach_read_from_4(doublewrite + TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED)
-	    != TRX_SYS_DOUBLEWRITE_SPACE_ID_STORED_N) {
-
-		/* We are upgrading from a version < 4.1.x to a version where
-		multiple tablespaces are supported. We must reset the space id
-		field in the pages in the doublewrite buffer because starting
-		from this version the space id is stored to
-		FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID. */
-
-		trx_doublewrite_must_reset_space_ids = TRUE;
-
-		fprintf(stderr,
-			"InnoDB: Resetting space id's in the"
-			" doublewrite buffer\n");
-	} else {
-		trx_sys_multiple_tablespace_format = TRUE;
-	}
-
-	/* Read the pages from the doublewrite buffer to memory */
-
-	fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, block1, 0,
-	       TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
-	       buf, NULL);
-	fil_io(OS_FILE_READ, TRUE, TRX_SYS_SPACE, 0, block2, 0,
-	       TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
-	       buf + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * UNIV_PAGE_SIZE,
-	       NULL);
-	/* Check if any of these pages is half-written in data files, in the
-	intended position */
-
-	page = buf;
-
-	for (i = 0; i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE * 2; i++) {
-
-		page_no = mach_read_from_4(page + FIL_PAGE_OFFSET);
-
-		if (trx_doublewrite_must_reset_space_ids) {
-
-			space_id = 0;
-			mach_write_to_4(page
-					+ FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID, 0);
-			/* We do not need to calculate new checksums for the
-			pages because the field .._SPACE_ID does not affect
-			them. Write the page back to where we read it from. */
-
-			if (i < TRX_SYS_DOUBLEWRITE_BLOCK_SIZE) {
-				source_page_no = block1 + i;
-			} else {
-				source_page_no = block2
-					+ i - TRX_SYS_DOUBLEWRITE_BLOCK_SIZE;
-			}
-
-			fil_io(OS_FILE_WRITE, TRUE, 0, 0, source_page_no, 0,
-			       UNIV_PAGE_SIZE, page, NULL);
-			/* printf("Resetting space id in page %lu\n",
-			source_page_no); */
-		} else {
-			space_id = mach_read_from_4(
-				page + FIL_PAGE_ARCH_LOG_NO_OR_SPACE_ID);
-		}
-
-		if (!restore_corrupt_pages) {
-			/* The database was shut down gracefully: no need to
-			restore pages */
-
-		} else if (!fil_tablespace_exists_in_mem(space_id)) {
-			/* Maybe we have dropped the single-table tablespace
-			and this page once belonged to it: do nothing */
-
-		} else if (!fil_check_adress_in_tablespace(space_id,
-							   page_no)) {
-			fprintf(stderr,
-				"InnoDB: Warning: a page in the"
-				" doublewrite buffer is not within space\n"
-				"InnoDB: bounds; space id %lu"
-				" page number %lu, page %lu in"
-				" doublewrite buf.\n",
-				(ulong) space_id, (ulong) page_no, (ulong) i);
-
-		} else if (space_id == TRX_SYS_SPACE
-			   && ((page_no >= block1
-				&& page_no
-				< block1 + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)
-			       || (page_no >= block2
-				   && page_no
-				   < (block2
-				      + TRX_SYS_DOUBLEWRITE_BLOCK_SIZE)))) {
-
-			/* It is an unwritten doublewrite buffer page:
-			do nothing */
-		} else {
-			ulint	zip_size = fil_space_get_zip_size(space_id);
-
-			/* Read in the actual page from the file */
-			fil_io(OS_FILE_READ, TRUE, space_id, zip_size,
-			       page_no, 0,
-			       zip_size ? zip_size : UNIV_PAGE_SIZE,
-			       read_buf, NULL);
-
-			/* Check if the page is corrupt */
-
-			if (UNIV_UNLIKELY
-			    (buf_page_is_corrupted(read_buf, zip_size))) {
-
-				fprintf(stderr,
-					"InnoDB: Warning: database page"
-					" corruption or a failed\n"
-					"InnoDB: file read of"
-					" space %lu page %lu.\n"
-					"InnoDB: Trying to recover it from"
-					" the doublewrite buffer.\n",
-					(ulong) space_id, (ulong) page_no);
-
-				if (buf_page_is_corrupted(page, zip_size)) {
-					fprintf(stderr,
-						"InnoDB: Dump of the page:\n");
-					buf_page_print(read_buf, zip_size);
-					fprintf(stderr,
-						"InnoDB: Dump of"
-						" corresponding page"
-						" in doublewrite buffer:\n");
-					buf_page_print(page, zip_size);
-
-					fprintf(stderr,
-						"InnoDB: Also the page in the"
-						" doublewrite buffer"
-						" is corrupt.\n"
-						"InnoDB: Cannot continue"
-						" operation.\n"
-						"InnoDB: You can try to"
-						" recover the database"
-						" with the my.cnf\n"
-						"InnoDB: option:\n"
-						"InnoDB:"
-						" innodb_force_recovery=6\n");
-					exit(1);
-				}
-
-				/* Write the good page from the
-				doublewrite buffer to the intended
-				position */
-
-				fil_io(OS_FILE_WRITE, TRUE, space_id,
-				       zip_size, page_no, 0,
-				       zip_size ? zip_size : UNIV_PAGE_SIZE,
-				       page, NULL);
-				fprintf(stderr,
-					"InnoDB: Recovered the page from"
-					" the doublewrite buffer.\n");
-			}
-		}
-
-		page += UNIV_PAGE_SIZE;
-	}
-
-	fil_flush_file_spaces(FIL_TABLESPACE);
-
-leave_func:
-	ut_free(unaligned_read_buf);
-}
-
 #ifdef UNIV_DEBUG
 /****************************************************************//**
 Checks whether a trx is in one of rw_trx_list or ro_trx_list.
@@ -1736,22 +1181,7 @@ trx_sys_close(void)
 	trx_purge_sys_close();
 
 	/* Free the double write data structures. */
-	ut_a(trx_doublewrite != NULL);
-	ut_ad(trx_doublewrite->s_reserved == 0);
-	ut_ad(trx_doublewrite->b_reserved == 0);
-
-	ut_free(trx_doublewrite->write_buf_unaligned);
-	trx_doublewrite->write_buf_unaligned = NULL;
-
-	mem_free(trx_doublewrite->buf_block_arr);
-	trx_doublewrite->buf_block_arr = NULL;
-
-	mem_free(trx_doublewrite->in_use);
-	trx_doublewrite->in_use = NULL;
-
-	mutex_free(&trx_doublewrite->mutex);
-	mem_free(trx_doublewrite);
-	trx_doublewrite = NULL;
+	buf_dblwr_free();
 
 	mutex_enter(&trx_sys->mutex);
 

=== modified file 'storage/innobase/ut/ut0mem.cc'
--- a/storage/innobase/ut/ut0mem.cc	2011-12-19 08:43:28 +0000
+++ b/storage/innobase/ut/ut0mem.cc	2011-12-28 10:40:55 +0000
@@ -89,17 +89,13 @@ ut_mem_init(void)
 #endif /* !UNIV_HOTBACKUP */
 
 /**********************************************************************//**
-Allocates memory. Sets it also to zero if UNIV_SET_MEM_TO_ZERO is
-defined and set_to_zero is TRUE.
+Allocates memory.
 @return	own: allocated memory */
 UNIV_INTERN
 void*
 ut_malloc_low(
 /*==========*/
 	ulint	n,		/*!< in: number of bytes to allocate */
-	ibool	set_to_zero,	/*!< in: TRUE if allocated memory should be
-				set to zero if UNIV_SET_MEM_TO_ZERO is
-				defined */
 	ibool	assert_on_error)/*!< in: if TRUE, we crash mysqld if the
 				memory cannot be allocated */
 {
@@ -111,12 +107,6 @@ ut_malloc_low(
 		ret = malloc(n);
 		ut_a(ret || !assert_on_error);
 
-#ifdef UNIV_SET_MEM_TO_ZERO
-		if (set_to_zero) {
-			memset(ret, '\0', n);
-			UNIV_MEM_ALLOC(ret, n);
-		}
-#endif
 		return(ret);
 	}
 
@@ -198,12 +188,6 @@ retry:
 		}
 	}
 
-	if (set_to_zero) {
-#ifdef UNIV_SET_MEM_TO_ZERO
-		memset(ret, '\0', n + sizeof(ut_mem_block_t));
-#endif
-	}
-
 	UNIV_MEM_ALLOC(ret, n + sizeof(ut_mem_block_t));
 
 	((ut_mem_block_t*) ret)->size = n + sizeof(ut_mem_block_t);
@@ -220,76 +204,11 @@ retry:
 	void*	ret = malloc(n);
 	ut_a(ret || !assert_on_error);
 
-# ifdef UNIV_SET_MEM_TO_ZERO
-	if (set_to_zero) {
-		memset(ret, '\0', n);
-		UNIV_MEM_ALLOC(ret, n);
-	}
-# endif
 	return(ret);
 #endif /* !UNIV_HOTBACKUP */
 }
 
 /**********************************************************************//**
-Allocates memory. Sets it also to zero if UNIV_SET_MEM_TO_ZERO is
-defined.
-@return	own: allocated memory */
-UNIV_INTERN
-void*
-ut_malloc(
-/*======*/
-	ulint	n)	/*!< in: number of bytes to allocate */
-{
-#ifndef UNIV_HOTBACKUP
-	return(ut_malloc_low(n, TRUE, TRUE));
-#else /* !UNIV_HOTBACKUP */
-	return(malloc(n));
-#endif /* !UNIV_HOTBACKUP */
-}
-
-#ifndef UNIV_HOTBACKUP
-/**********************************************************************//**
-Tests if malloc of n bytes would succeed. ut_malloc() asserts if memory runs
-out. It cannot be used if we want to return an error message. Prints to
-stderr a message if fails.
-@return	TRUE if succeeded */
-UNIV_INTERN
-ibool
-ut_test_malloc(
-/*===========*/
-	ulint	n)	/*!< in: try to allocate this many bytes */
-{
-	void*	ret;
-
-	ret = malloc(n);
-
-	if (ret == NULL) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			"  InnoDB: Error: cannot allocate"
-			" %lu bytes of memory for\n"
-			"InnoDB: a BLOB with malloc! Total allocated memory\n"
-			"InnoDB: by InnoDB %lu bytes."
-			" Operating system errno: %d\n"
-			"InnoDB: Check if you should increase"
-			" the swap file or\n"
-			"InnoDB: ulimits of your operating system.\n"
-			"InnoDB: On FreeBSD check you have"
-			" compiled the OS with\n"
-			"InnoDB: a big enough maximum process size.\n",
-			(ulong) n,
-			(ulong) ut_total_allocated_memory,
-			(int) errno);
-		return(FALSE);
-	}
-
-	free(ret);
-
-	return(TRUE);
-}
-#endif /* !UNIV_HOTBACKUP */
-
-/**********************************************************************//**
 Frees a memory block allocated with ut_malloc. Freeing a NULL pointer is
 a nop. */
 UNIV_INTERN

No bundle (reason: useless for push emails).
Thread
bzr push into mysql-trunk-wl5534 branch (jon.hauglid:3459 to 3460) Jon Olav Hauglid9 Jan