List:Commits« Previous MessageNext Message »
From:mcbrown Date:November 29 2007 12:23pm
Subject:svn commit - mysqldoc@docsrva: r8973 - trunk/refman-6.0
View as plain text  
Author: mcbrown
Date: 2007-11-29 12:23:14 +0100 (Thu, 29 Nov 2007)
New Revision: 8973

Log:
Adding InnoDB summary table



Removed:
   trunk/refman-6.0/se-innodb.xml
Renamed/Moved:
   trunk/refman-6.0/se-innodb-core.xml (from rev 8971, trunk/refman-6.0/se-innodb.xml)
Modified:
   trunk/refman-6.0/Makefile.depends
   trunk/refman-6.0/se-falcon-core.xml
   trunk/refman-6.0/storage-engines.xml


Modified: trunk/refman-6.0/Makefile.depends
===================================================================
--- trunk/refman-6.0/Makefile.depends	2007-11-29 11:20:29 UTC (rev 8972)
+++ trunk/refman-6.0/Makefile.depends	2007-11-29 11:23:14 UTC (rev 8973)
Changed blocks: 5, Lines Added: 42, Lines Deleted: 25; 4504 bytes

@@ -751,6 +751,32 @@
 dynxml-local-se-falcon-manprepped.xml: $(dynxml_local_se_falcon_SOURCES) $(dynxml_local_se_falcon_IDMAPS)
 dynxml-local-se-falcon-remprepped.xml: $(dynxml_local_se_falcon_SOURCES) $(dynxml_local_se_falcon_IDMAPS)
 dynxml-local-se-falcon.xml: $(dynxml_local_se_falcon_INCLUDES)
+dynxml_local_se_innodb_INCLUDES = \
+	../common/fixedchars.ent \
+	../common/phrases.ent \
+	../dynamic-docs/command-optvars/mysqld.xml \
+	../refman-common/urls.ent \
+	all-entities.ent \
+	se-innodb-core.xml \
+	versions.ent
+dynxml_local_se_innodb_IMAGES =
+dynxml_local_se_innodb_SOURCES = dynxml-local-se-innodb.xml $(dynxml_local_se_innodb_INCLUDES)
+dynxml_local_se_innodb_IDMAPS = \
+	metadata/dba-core.idmap \
+	metadata/errors-problems.idmap \
+	metadata/installing-core.idmap \
+	metadata/se-innodb.idmap \
+	metadata/sql-syntax.idmap \
+	metadata/using-mysql-programs.idmap
+dynxml-local-se-innodb.validpure: $(dynxml_local_se_innodb_SOURCES)
+dynxml-local-se-innodb.titles: $(dynxml_local_se_innodb_SOURCES)
+dynxml-local-se-innodb.useless: $(dynxml_local_se_innodb_SOURCES)
+dynxml-local-se-innodb.valid: $(dynxml_local_se_innodb_SOURCES) $(dynxml_local_se_innodb_IDMAPS)
+dynxml-local-se-innodb.validwarn: $(dynxml_local_se_innodb_SOURCES) $(dynxml_local_se_innodb_IDMAPS)
+dynxml-local-se-innodb-prepped.xml: $(dynxml_local_se_innodb_SOURCES) $(dynxml_local_se_innodb_IDMAPS)
+dynxml-local-se-innodb-manprepped.xml: $(dynxml_local_se_innodb_SOURCES) $(dynxml_local_se_innodb_IDMAPS)
+dynxml-local-se-innodb-remprepped.xml: $(dynxml_local_se_innodb_SOURCES) $(dynxml_local_se_innodb_IDMAPS)
+dynxml-local-se-innodb.xml: $(dynxml_local_se_innodb_INCLUDES)
 errmsgs_client_INCLUDES =
 errmsgs_client_IMAGES =
 errmsgs_client_SOURCES = errmsgs-client.xml $(errmsgs_client_INCLUDES)

@@ -1278,6 +1304,7 @@
 	dynxml-local-news-5.2.xml \
 	dynxml-local-news-6.0.xml \
 	dynxml-local-se-falcon.xml \
+	dynxml-local-se-innodb.xml \
 	errmsgs-client.xml \
 	errmsgs-server.xml \
 	errors-problems.xml \

@@ -1328,7 +1355,7 @@
 	se-example.xml \
 	se-falcon-core.xml \
 	se-federated.xml \
-	se-innodb.xml \
+	se-innodb-core.xml \
 	se-memory.xml \
 	se-merge.xml \
 	se-myisam.xml \

@@ -2437,29 +2464,18 @@
 se-federated-manprepped.xml: $(se_federated_SOURCES) $(se_federated_IDMAPS)
 se-federated-remprepped.xml: $(se_federated_SOURCES) $(se_federated_IDMAPS)
 
-se_innodb_INCLUDES = \
-	../common/fixedchars.ent \
-	../common/phrases.ent \
-	../refman-common/urls.ent \
-	all-entities.ent \
-	versions.ent
-se_innodb_IMAGES =
-se_innodb_SOURCES = se-innodb.xml $(se_innodb_INCLUDES)
-se_innodb_IDMAPS = \
-	metadata/dba-core.idmap \
-	metadata/errors-problems.idmap \
-	metadata/installing-core.idmap \
-	metadata/se-innodb.idmap \
-	metadata/sql-syntax.idmap \
-	metadata/using-mysql-programs.idmap
-se-innodb.validpure: $(se_innodb_SOURCES)
-se-innodb.titles: $(se_innodb_SOURCES)
-se-innodb.useless: $(se_innodb_SOURCES)
-se-innodb.valid: $(se_innodb_SOURCES) $(se_innodb_IDMAPS)
-se-innodb.validwarn: $(se_innodb_SOURCES) $(se_innodb_IDMAPS)
-se-innodb-prepped.xml: $(se_innodb_SOURCES) $(se_innodb_IDMAPS)
-se-innodb-manprepped.xml: $(se_innodb_SOURCES) $(se_innodb_IDMAPS)
-se-innodb-remprepped.xml: $(se_innodb_SOURCES) $(se_innodb_IDMAPS)
+se_innodb_core_INCLUDES =
+se_innodb_core_IMAGES =
+se_innodb_core_SOURCES = se-innodb-core.xml $(se_innodb_core_INCLUDES)
+se_innodb_core_IDMAPS =
+se-innodb-core.validpure: $(se_innodb_core_SOURCES)
+se-innodb-core.titles: $(se_innodb_core_SOURCES)
+se-innodb-core.useless: $(se_innodb_core_SOURCES)
+se-innodb-core.valid: $(se_innodb_core_SOURCES) $(se_innodb_core_IDMAPS)
+se-innodb-core.validwarn: $(se_innodb_core_SOURCES) $(se_innodb_core_IDMAPS)
+se-innodb-core-prepped.xml: $(se_innodb_core_SOURCES) $(se_innodb_core_IDMAPS)
+se-innodb-core-manprepped.xml: $(se_innodb_core_SOURCES) $(se_innodb_core_IDMAPS)
+se-innodb-core-remprepped.xml: $(se_innodb_core_SOURCES) $(se_innodb_core_IDMAPS)
 
 se_memory_INCLUDES = \
 	../common/fixedchars.ent \

@@ -2617,13 +2633,14 @@
 	../refman-common/urls.ent \
 	all-entities.ent \
 	dynxml-local-se-falcon.xml \
+	dynxml-local-se-innodb.xml \
 	se-archive.xml \
 	se-blackhole.xml \
 	se-csv.xml \
 	se-example.xml \
 	se-falcon-core.xml \
 	se-federated.xml \
-	se-innodb.xml \
+	se-innodb-core.xml \
 	se-memory.xml \
 	se-merge.xml \
 	se-myisam.xml \


Modified: trunk/refman-6.0/se-falcon-core.xml
===================================================================
--- trunk/refman-6.0/se-innodb-core.xml	                        (rev 0)
+++ trunk/refman-6.0/se-innodb-core.xml	2007-11-29 11:23:14 UTC (rev 8973)
Changed blocks: 2, Lines Added: 8355, Lines Deleted: 1; 309634 bytes

@@ -9,6 +9,15 @@
 
   <remark role="dynamic-dependency-list"/>
 
+  <indexterm>
+    <primary>Falcon storage engine</primary>
+  </indexterm>
+
+  <indexterm>
+    <primary>tables</primary>
+    <secondary>Falcon</secondary>
+  </indexterm>
+
   <para>
     The Falcon Storage Engine has been designed with modern database
     requirements in mind, and particularly for use within high-volume

Copied: trunk/refman-6.0/se-innodb-core.xml (from rev 8971, trunk/refman-6.0/se-innodb.xml)
===================================================================
--- trunk/refman-6.0/se-innodb-core.xml	                        (rev 0)
+++ trunk/refman-6.0/se-innodb-core.xml	2007-11-29 11:23:14 UTC (rev 8973)

@@ -0,0 +1,8345 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE section PUBLIC "-//OASIS//DTD DocBook XML V4.3//EN" "http://www.oasis-open.org/docbook/xml/4.3/docbookx.dtd" [
+<!ENTITY % all.entities SYSTEM "all-entities.ent">
+  %all.entities;
+]>
+<section id="innodb">
+
+  <title>The <literal>InnoDB</literal> Storage Engine</title>
+
+  <remark role="dynamic-dependency-list"/>
+
+  <indexterm>
+    <primary>InnoDB storage engine</primary>
+  </indexterm>
+
+  <indexterm>
+    <primary>tables</primary>
+    <secondary>InnoDB</secondary>
+  </indexterm>
+
+  <remark role="todo">
+    A number of node names in this chapter are very generic (e.g.,
+    "Backing up") and could apply in non-InnoDB contexts. Might be best
+    to rename a bunch of them.
+  </remark>
+
+  <section id="innodb-overview">
+
+    <title><literal>InnoDB</literal> Overview</title>
+
+    <indexterm>
+      <primary>InnoDB</primary>
+    </indexterm>
+
+    <indexterm>
+      <primary>transactions</primary>
+      <secondary>support</secondary>
+    </indexterm>
+
+    <indexterm>
+      <primary>transaction-safe tables</primary>
+    </indexterm>
+
+    <indexterm>
+      <primary>ACID</primary>
+    </indexterm>
+
+    <para>
+      <literal>InnoDB</literal> provides MySQL with a transaction-safe
+      (<literal>ACID</literal> compliant) storage engine that has
+      commit, rollback, and crash recovery capabilities.
+      <literal>InnoDB</literal> does locking on the row level and also
+      provides an Oracle-style consistent non-locking read in
+      <literal>SELECT</literal> statements. These features increase
+      multi-user concurrency and performance. There is no need for lock
+      escalation in <literal>InnoDB</literal> because row-level locks
+      fit in very little space. <literal>InnoDB</literal> also supports
+      <literal>FOREIGN KEY</literal> constraints. You can freely mix
+      <literal>InnoDB</literal> tables with tables from other MySQL
+      storage engines, even within the same statement.
+    </para>
+
+    <para>
+      <literal>InnoDB</literal> has been designed for maximum
+      performance when processing large data volumes. Its CPU efficiency
+      is probably not matched by any other disk-based relational
+      database engine.
+    </para>
+
+    <para>
+      Fully integrated with MySQL Server, the <literal>InnoDB</literal>
+      storage engine maintains its own buffer pool for caching data and
+      indexes in main memory. <literal>InnoDB</literal> stores its
+      tables and indexes in a tablespace, which may consist of several
+      files (or raw disk partitions). This is different from, for
+      example, <literal>MyISAM</literal> tables where each table is
+      stored using separate files. <literal>InnoDB</literal> tables can
+      be of any size even on operating systems where file size is
+      limited to 2GB.
+    </para>
+
+    <para>
+      <literal>InnoDB</literal> is included in binary distributions by
+      default. The Windows Essentials installer makes
+      <literal>InnoDB</literal> the MySQL default storage engine on
+      Windows.
+    </para>
+
+    <para>
+      <literal>InnoDB</literal> is used in production at numerous large
+      database sites requiring high performance. The famous Internet
+      news site Slashdot.org runs on <literal>InnoDB</literal>. Mytrix,
+      Inc. stores over 1TB of data in <literal>InnoDB</literal>, and
+      another site handles an average load of 800 inserts/updates per
+      second in <literal>InnoDB</literal>.
+    </para>
+
+    <para>
+      <literal>InnoDB</literal> is published under the same GNU GPL
+      License Version 2 (of June 1991) as MySQL. For more information on
+      MySQL licensing, see
+      <ulink url="http://www.mysql.com/company/legal/licensing/"/>.
+    </para>
+
+    <para>
+      <emphasis role="bold">Additional resources</emphasis>
+    </para>
+
+    <itemizedlist>
+
+      <listitem>
+        <para>
+          A forum dedicated to the <literal>InnoDB</literal> storage
+          engine is available at
+          <ulink url="&base-url-forum-list;?22"/>.
+        </para>
+      </listitem>
+
+    </itemizedlist>
+
+  </section>
+
+  <section id="innodb-contact-information">
+
+    <title><literal>InnoDB</literal> Contact Information</title>
+
+    <para>
+      Contact information for Innobase Oy, producer of the
+      <literal>InnoDB</literal> engine:
+    </para>
+
+<programlisting>
+Web site: <ulink url="http://www.innodb.com/"/>
+Email: <email>sales@stripped</email>
+Phone: +358-9-6969 3250 (office)
+       +358-40-5617367 (mobile)
+
+Innobase Oy Inc.
+World Trade Center Helsinki
+Aleksanterinkatu 17
+P.O.Box 800
+00101 Helsinki
+Finland
+</programlisting>
+
+  </section>
+
+  <section id="innodb-configuration">
+
+    <title><literal>InnoDB</literal> Configuration</title>
+
+    <para>
+      The <literal>InnoDB</literal> storage engine is enabled by
+      default. If you don't want to use <literal>InnoDB</literal>
+      tables, you can add the <literal>skip-innodb</literal> option to
+      your MySQL option file.
+    </para>
+
+    <note>
+      <para>
+        <literal>InnoDB</literal> provides MySQL with a transaction-safe
+        (<literal>ACID</literal> compliant) storage engine that has
+        commit, rollback, and crash recovery capabilities.
+        <emphasis role="bold">However, it cannot do so</emphasis> if the
+        underlying operating system or hardware does not work as
+        advertised. Many operating systems or disk subsystems may delay
+        or reorder write operations to improve performance. On some
+        operating systems, the very system call that should wait until
+        all unwritten data for a file has been flushed &mdash;
+        <literal>fsync()</literal> &mdash; might actually return before
+        the data has been flushed to stable storage. Because of this, an
+        operating system crash or a power outage may destroy recently
+        committed data, or in the worst case, even corrupt the database
+        because of write operations having been reordered. If data
+        integrity is important to you, you should perform some
+        <quote>pull-the-plug</quote> tests before using anything in
+        production. On Mac OS X 10.3 and up, <literal>InnoDB</literal>
+        uses a special <literal>fcntl()</literal> file flush method.
+        Under Linux, it is advisable to <emphasis role="bold">disable
+        the write-back cache</emphasis>.
+      </para>
+    </note>
+
+    <para>
+      On ATAPI hard disks, a command such <literal>hdparm -W0
+      /dev/hda</literal> may work to disable the write-back cache.
+      <emphasis role="bold">Beware that some drives or disk controllers
+      may be unable to disable the write-back cache.</emphasis>
+    </para>
+
+    <para>
+      Two important disk-based resources managed by the
+      <literal>InnoDB</literal> storage engine are its tablespace data
+      files and its log files.
+    </para>
+
+    <note>
+      <para>
+        If you specify no <literal>InnoDB</literal> configuration
+        options, MySQL creates an auto-extending 10MB data file named
+        <filename>ibdata1</filename> and two 5MB log files named
+        <filename>ib_logfile0</filename> and
+        <filename>ib_logfile1</filename> in the MySQL data directory. To
+        get good performance, you should explicitly provide
+        <literal>InnoDB</literal> parameters as discussed in the
+        following examples. Naturally, you should edit the settings to
+        suit your hardware and requirements.
+      </para>
+    </note>
+
+    <indexterm>
+      <primary>NFS</primary>
+      <secondary>InnoDB</secondary>
+    </indexterm>
+
+    <indexterm>
+      <primary>InnoDB</primary>
+      <secondary>NFS</secondary>
+    </indexterm>
+
+    <note>
+      <para>
+        It is not a good idea to configure <literal>InnoDB</literal> to
+        use datafiles or logfiles on NFS volumes. Otherwise, the files
+        might be locked by other processes and become unavailable for
+        use by MySQL.
+      </para>
+    </note>
+
+    <formalpara role="mnmas">
+
+      <title>MySQL Enterprise</title>
+
+      <para>
+        For advice on settings suitable to your specific circumstances,
+        subscribe to the MySQL Enterprise Monitor. For more information
+        see <ulink url="&base-url-enterprise;advisors.html"/>.
+      </para>
+
+    </formalpara>
+
+    <para>
+      The examples shown here are representative. See
+      <xref linkend="innodb-parameters"/> for additional information
+      about <literal>InnoDB</literal>-related configuration parameters.
+    </para>
+
+    <para>
+      To set up the <literal>InnoDB</literal> tablespace files, use the
+      <literal>innodb_data_file_path</literal> option in the
+      <literal>[mysqld]</literal> section of the
+      <filename>my.cnf</filename> option file. On Windows, you can use
+      <filename>my.ini</filename> instead. The value of
+      <literal>innodb_data_file_path</literal> should be a list of one
+      or more data file specifications. If you name more than one data
+      file, separate them by semicolon
+      (<quote><literal>;</literal></quote>) characters:
+    </para>
+
+<programlisting>
+innodb_data_file_path=<replaceable>datafile_spec1</replaceable>[;<replaceable>datafile_spec2</replaceable>]...
+</programlisting>
+
+    <para>
+      For example, a setting that explicitly creates a tablespace having
+      the same characteristics as the default is as follows:
+    </para>
+
+<programlisting>
+[mysqld]
+innodb_data_file_path=ibdata1:10M:autoextend
+</programlisting>
+
+    <para>
+      This setting configures a single 10MB data file named
+      <filename>ibdata1</filename> that is auto-extending. No location
+      for the file is given, so by default, <literal>InnoDB</literal>
+      creates it in the MySQL data directory.
+    </para>
+
+    <para>
+      Sizes are specified using <literal>M</literal> or
+      <literal>G</literal> suffix letters to indicate units of MB or GB.
+    </para>
+
+    <para>
+      A tablespace containing a fixed-size 50MB data file named
+      <filename>ibdata1</filename> and a 50MB auto-extending file named
+      <filename>ibdata2</filename> in the data directory can be
+      configured like this:
+    </para>
+
+<programlisting>
+[mysqld]
+innodb_data_file_path=ibdata1:50M;ibdata2:50M:autoextend
+</programlisting>
+
+    <para>
+      The full syntax for a data file specification includes the
+      filename, its size, and several optional attributes:
+    </para>
+
+<programlisting>
+<replaceable>file_name</replaceable>:<replaceable>file_size</replaceable>[:autoextend[:max:<replaceable>max_file_size</replaceable>]]
+</programlisting>
+
+    <para>
+      The <literal>autoextend</literal> attribute and those following
+      can be used only for the last data file in the
+      <literal>innodb_data_file_path</literal> line.
+    </para>
+
+    <para>
+      If you specify the <literal>autoextend</literal> option for the
+      last data file, <literal>InnoDB</literal> extends the data file if
+      it runs out of free space in the tablespace. The increment is 8MB
+      at a time by default. It can be modified by changing the
+      <literal>innodb_autoextend_increment</literal> system variable.
+    </para>
+
+    <para>
+      If the disk becomes full, you might want to add another data file
+      on another disk. Instructions for reconfiguring an existing
+      tablespace are given in <xref linkend="adding-and-removing"/>.
+    </para>
+
+    <para>
+      <literal>InnoDB</literal> is not aware of the filesystem maximum
+      file size, so be cautious on filesystems where the maximum file
+      size is a small value such as 2GB. To specify a maximum size for
+      an auto-extending data file, use the <literal>max</literal>
+      attribute. The following configuration allows
+      <filename>ibdata1</filename> to grow up to a limit of 500MB:
+    </para>
+
+<programlisting>
+[mysqld]
+innodb_data_file_path=ibdata1:10M:autoextend:max:500M
+</programlisting>
+
+    <para>
+      <literal>InnoDB</literal> creates tablespace files in the MySQL
+      data directory by default. To specify a location explicitly, use
+      the <literal>innodb_data_home_dir</literal> option. For example,
+      to use two files named <filename>ibdata1</filename> and
+      <filename>ibdata2</filename> but create them in the
+      <filename>/ibdata</filename> directory, configure
+      <literal>InnoDB</literal> like this:
+    </para>
+
+<programlisting>
+[mysqld]
+innodb_data_home_dir = /ibdata
+innodb_data_file_path=ibdata1:50M;ibdata2:50M:autoextend
+</programlisting>
+
+    <note>
+      <para>
+        <literal>InnoDB</literal> does not create directories, so make
+        sure that the <filename>/ibdata</filename> directory exists
+        before you start the server. This is also true of any log file
+        directories that you configure. Use the Unix or DOS
+        <literal>mkdir</literal> command to create any necessary
+        directories.
+      </para>
+    </note>
+
+    <para>
+      <literal>InnoDB</literal> forms the directory path for each data
+      file by textually concatenating the value of
+      <literal>innodb_data_home_dir</literal> to the data file name,
+      adding a pathname separator (slash or backslash) between values if
+      necessary. If the <literal>innodb_data_home_dir</literal> option
+      is not mentioned in <filename>my.cnf</filename> at all, the
+      default value is the <quote>dot</quote> directory
+      <filename>./</filename>, which means the MySQL data directory.
+      (The MySQL server changes its current working directory to its
+      data directory when it begins executing.)
+    </para>
+
+    <para>
+      If you specify <literal>innodb_data_home_dir</literal> as an empty
+      string, you can specify absolute paths for the data files listed
+      in the <literal>innodb_data_file_path</literal> value. The
+      following example is equivalent to the preceding one:
+    </para>
+
+<programlisting>
+[mysqld]
+innodb_data_home_dir =
+innodb_data_file_path=/ibdata/ibdata1:50M;/ibdata/ibdata2:50M:autoextend
+</programlisting>
+
+    <para>
+      <emphasis role="bold">A simple <filename>my.cnf</filename>
+      example.</emphasis> Suppose that you have a computer with 128MB
+      RAM and one hard disk. The following example shows possible
+      configuration parameters in <filename>my.cnf</filename> or
+      <filename>my.ini</filename> for <literal>InnoDB</literal>,
+      including the <literal>autoextend</literal> attribute. The example
+      suits most users, both on Unix and Windows, who do not want to
+      distribute <literal>InnoDB</literal> data files and log files onto
+      several disks. It creates an auto-extending data file
+      <filename>ibdata1</filename> and two <literal>InnoDB</literal> log
+      files <filename>ib_logfile0</filename> and
+      <filename>ib_logfile1</filename> in the MySQL data directory.
+    </para>
+
+<programlisting>
+[mysqld]
+# You can write your other MySQL server options here
+# ...
+# Data files must be able to hold your data and indexes.
+# Make sure that you have enough free disk space.
+innodb_data_file_path = ibdata1:10M:autoextend
+#
+# Set buffer pool size to 50-80% of your computer's memory
+innodb_buffer_pool_size=70M
+innodb_additional_mem_pool_size=10M
+#
+# Set the log file size to about 25% of the buffer pool size
+innodb_log_file_size=20M
+innodb_log_buffer_size=8M
+#
+innodb_flush_log_at_trx_commit=1
+</programlisting>
+
+    <para>
+      Make sure that the MySQL server has the proper access rights to
+      create files in the data directory. More generally, the server
+      must have access rights in any directory where it needs to create
+      data files or log files.
+    </para>
+
+    <para>
+      Note that data files must be less than 2GB in some filesystems.
+      The combined size of the log files must be less than 4GB. The
+      combined size of data files must be at least 10MB.
+    </para>
+
+    <para>
+      When you create an <literal>InnoDB</literal> tablespace for the
+      first time, it is best that you start the MySQL server from the
+      command prompt. <literal>InnoDB</literal> then prints the
+      information about the database creation to the screen, so you can
+      see what is happening. For example, on Windows, if
+      <command>mysqld</command> is located in <filename>C:\Program
+      Files\MySQL\MySQL Server &current-series;\bin</filename>, you can
+      start it like this:
+    </para>
+
+<programlisting>
+C:\&gt; <userinput>"C:\Program Files\MySQL\MySQL Server &current-series;\bin\mysqld" --console</userinput>
+</programlisting>
+
+    <para>
+      If you do not send server output to the screen, check the server's
+      error log to see what <literal>InnoDB</literal> prints during the
+      startup process.
+    </para>
+
+    <para>
+      See <xref linkend="innodb-init"/>, for an example of what the
+      information displayed by <literal>InnoDB</literal> should look
+      like.
+    </para>
+
+    <para>
+      You can place <literal>InnoDB</literal> options in the
+      <literal>[mysqld]</literal> group of any option file that your
+      server reads when it starts. The locations for option files are
+      described in <xref linkend="option-files"/>.
+    </para>
+
+    <para>
+      If you installed MySQL on Windows using the installation and
+      configuration wizards, the option file will be the
+      <filename>my.ini</filename> file located in your MySQL
+      installation directory. See
+      <xref linkend="mysql-config-wizard-file-location"/>.
+    </para>
+
+    <para>
+      If your PC uses a boot loader where the <filename>C:</filename>
+      drive is not the boot drive, your only option is to use the
+      <filename>my.ini</filename> file in your Windows directory
+      (typically <filename>C:\WINDOWS</filename>). You can use the
+      <literal>SET</literal> command at the command prompt in a console
+      window to print the value of <literal>WINDIR</literal>:
+    </para>
+
+<programlisting>
+C:\&gt; <userinput>SET WINDIR</userinput>
+windir=C:\WINDOWS
+</programlisting>
+
+    <para>
+      If you want to make sure that <command>mysqld</command> reads
+      options only from a specific file, you can use the
+      <option>--defaults-file</option> option as the first option on the
+      command line when starting the server:
+    </para>
+
+<programlisting>
+mysqld --defaults-file=<replaceable>your_path_to_my_cnf</replaceable>
+</programlisting>
+
+    <para>
+      <emphasis role="bold">An advanced <filename>my.cnf</filename>
+      example.</emphasis> Suppose that you have a Linux computer with
+      2GB RAM and three 60GB hard disks at directory paths
+      <filename>/</filename>, <filename>/dr2</filename> and
+      <filename>/dr3</filename>. The following example shows possible
+      configuration parameters in <filename>my.cnf</filename> for
+      <literal>InnoDB</literal>.
+    </para>
+
+<programlisting>
+[mysqld]
+# You can write your other MySQL server options here
+# ...
+innodb_data_home_dir =
+#
+# Data files must be able to hold your data and indexes
+innodb_data_file_path = /db/ibdata1:2000M;/dr2/db/ibdata2:2000M:autoextend
+#
+# Set buffer pool size to 50-80% of your computer's memory,
+# but make sure on Linux x86 total memory usage is &lt; 2GB
+innodb_buffer_pool_size=1G
+innodb_additional_mem_pool_size=20M
+innodb_log_group_home_dir = /dr3/iblogs
+#
+innodb_log_files_in_group = 2
+#
+# Set the log file size to about 25% of the buffer pool size
+innodb_log_file_size=250M
+innodb_log_buffer_size=8M
+#
+innodb_flush_log_at_trx_commit=1
+innodb_lock_wait_timeout=50
+#
+# Uncomment the next lines if you want to use them
+#innodb_thread_concurrency=5
+</programlisting>
+
+    <para>
+      In some cases, database performance improves if all the data is
+      not placed on the same physical disk. Putting log files on a
+      different disk from data is very often beneficial for performance.
+      The example illustrates how to do this. It places the two data
+      files on different disks and places the log files on the third
+      disk. <literal>InnoDB</literal> fills the tablespace beginning
+      with the first data file. You can also use raw disk partitions
+      (raw devices) as <literal>InnoDB</literal> data files, which may
+      speed up I/O. See <xref linkend="innodb-raw-devices"/>.
+    </para>
+
+    <warning>
+      <para>
+        On 32-bit GNU/Linux x86, you must be careful not to set memory
+        usage too high. <literal>glibc</literal> may allow the process
+        heap to grow over thread stacks, which crashes your server. It
+        is a risk if the value of the following expression is close to
+        or exceeds 2GB:
+      </para>
+    </warning>
+
+<programlisting>
+innodb_buffer_pool_size
++ key_buffer_size
++ max_connections*(sort_buffer_size+read_buffer_size+binlog_cache_size)
++ max_connections*2MB
+</programlisting>
+
+    <para>
+      Each thread uses a stack (often 2MB, but only 256KB in MySQL AB
+      binaries) and in the worst case also uses
+      <literal>sort_buffer_size + read_buffer_size</literal> additional
+      memory.
+    </para>
+
+    <para>
+      By compiling MySQL yourself, you can use up to 64GB of physical
+      memory in 32-bit Windows. See the description for
+      <literal>innodb_buffer_pool_awe_mem_mb</literal> in
+      <xref linkend="innodb-parameters"/>.
+    </para>
+
+    <para>
+      <emphasis role="bold">How to tune other <command>mysqld</command>
+      server parameters?</emphasis> The following values are typical and
+      suit most users:
+    </para>
+
+<programlisting>
+[mysqld]
+skip-external-locking
+max_connections=200
+read_buffer_size=1M
+sort_buffer_size=1M
+#
+# Set key_buffer to 5 - 50% of your RAM depending on how much
+# you use MyISAM tables, but keep key_buffer_size + InnoDB
+# buffer pool size &lt; 80% of your RAM
+key_buffer_size=<replaceable>value</replaceable>
+</programlisting>
+
+    <section id="multiple-tablespaces">
+
+      <title>Using Per-Table Tablespaces</title>
+
+      <para>
+        You can store each <literal>InnoDB</literal> table and its
+        indexes in its own file. This feature is called <quote>multiple
+        tablespaces</quote> because in effect each table has its own
+        tablespace.
+      </para>
+
+      <para>
+        Using multiple tablespaces can be beneficial to users who want
+        to move specific tables to separate physical disks or who wish
+        to restore backups of single tables quickly without interrupting
+        the use of the remaining <literal>InnoDB</literal> tables.
+      </para>
+
+      <para>
+        You can enable multiple tablespaces by adding this line to the
+        <literal>[mysqld]</literal> section of
+        <filename>my.cnf</filename>:
+      </para>
+
+<programlisting>
+[mysqld]
+innodb_file_per_table
+</programlisting>
+
+      <para>
+        After restarting the server, <literal>InnoDB</literal> stores
+        each newly created table into its own file
+        <filename><replaceable>tbl_name</replaceable>.ibd</filename> in
+        the database directory where the table belongs. This is similar
+        to what the <literal>MyISAM</literal> storage engine does, but
+        <literal>MyISAM</literal> divides the table into a data file
+        <filename><replaceable>tbl_name</replaceable>.MYD</filename> and
+        the index file
+        <filename><replaceable>tbl_name</replaceable>.MYI</filename>.
+        For <literal>InnoDB</literal>, the data and the indexes are
+        stored together in the <filename>.ibd</filename> file. The
+        <filename><replaceable>tbl_name</replaceable>.frm</filename>
+        file is still created as usual.
+      </para>
+
+      <para>
+        If you remove the <literal>innodb_file_per_table</literal> line
+        from <filename>my.cnf</filename> and restart the server,
+        <literal>InnoDB</literal> creates tables inside the shared
+        tablespace files again.
+      </para>
+
+      <para>
+        <literal>innodb_file_per_table</literal> affects only table
+        creation, not access to existing tables. If you start the server
+        with this option, new tables are created using
+        <filename>.ibd</filename> files, but you can still access tables
+        that exist in the shared tablespace. If you remove the option
+        and restart the server, new tables are created in the shared
+        tablespace, but you can still access any tables that were
+        created using multiple tablespaces.
+      </para>
+
+      <note>
+        <para>
+          <literal>InnoDB</literal> always needs the shared tablespace
+          because it puts its internal data dictionary and undo logs
+          there. The <filename>.ibd</filename> files are not sufficient
+          for <literal>InnoDB</literal> to operate.
+        </para>
+      </note>
+
+      <note>
+        <para>
+          You cannot freely move <filename>.ibd</filename> files between
+          database directories as you can with <literal>MyISAM</literal>
+          table files. This is because the table definition that is
+          stored in the <literal>InnoDB</literal> shared tablespace
+          includes the database name, and because
+          <literal>InnoDB</literal> must preserve the consistency of
+          transaction IDs and log sequence numbers.
+        </para>
+      </note>
+
+      <para>
+        To move an <filename>.ibd</filename> file and the associated
+        table from one database to another, use a <literal>RENAME
+        TABLE</literal> statement:
+      </para>
+
+<programlisting>
+RENAME TABLE <replaceable>db1.tbl_name</replaceable> TO <replaceable>db2.tbl_name</replaceable>;
+</programlisting>
+
+      <indexterm>
+        <primary>DISCARD TABLESPACE</primary>
+      </indexterm>
+
+      <indexterm>
+        <primary>IMPORT TABLESPACE</primary>
+      </indexterm>
+
+      <para>
+        If you have a <quote>clean</quote> backup of an
+        <filename>.ibd</filename> file, you can restore it to the MySQL
+        installation from which it originated as follows:
+      </para>
+
+      <orderedlist>
+
+        <listitem>
+          <para>
+            Issue this <literal>ALTER TABLE</literal> statement:
+          </para>
+
+<programlisting>
+ALTER TABLE <replaceable>tbl_name</replaceable> DISCARD TABLESPACE;
+</programlisting>
+
+          <caution>
+            <para>
+              This statement deletes the current
+              <filename>.ibd</filename> file.
+            </para>
+          </caution>
+        </listitem>
+
+        <listitem>
+          <para>
+            Put the backup <filename>.ibd</filename> file back in the
+            proper database directory.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            Issue this <literal>ALTER TABLE</literal> statement:
+          </para>
+
+<programlisting>
+ALTER TABLE <replaceable>tbl_name</replaceable> IMPORT TABLESPACE;
+</programlisting>
+        </listitem>
+
+      </orderedlist>
+
+      <para>
+        In this context, a <quote>clean</quote>
+        <filename>.ibd</filename> file backup means:
+      </para>
+
+      <itemizedlist>
+
+        <listitem>
+          <para>
+            There are no uncommitted modifications by transactions in
+            the <filename>.ibd</filename> file.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            There are no unmerged insert buffer entries in the
+            <filename>.ibd</filename> file.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            Purge has removed all delete-marked index records from the
+            <filename>.ibd</filename> file.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            <command>mysqld</command> has flushed all modified pages of
+            the <filename>.ibd</filename> file from the buffer pool to
+            the file.
+          </para>
+        </listitem>
+
+      </itemizedlist>
+
+      <para>
+        You can make a clean backup <filename>.ibd</filename> file using
+        the following method:
+      </para>
+
+      <orderedlist>
+
+        <listitem>
+          <para>
+            Stop all activity from the <command>mysqld</command> server
+            and commit all transactions.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            Wait until <literal>SHOW ENGINE INNODB STATUS</literal>
+            shows that there are no active transactions in the database,
+            and the main thread status of <literal>InnoDB</literal> is
+            <literal>Waiting for server activity</literal>. Then you can
+            make a copy of the <filename>.ibd</filename> file.
+          </para>
+        </listitem>
+
+      </orderedlist>
+
+      <para>
+        Another method for making a clean copy of an
+        <filename>.ibd</filename> file is to use the commercial
+        <command>InnoDB Hot Backup</command> tool:
+      </para>
+
+      <orderedlist>
+
+        <listitem>
+          <para>
+            Use <command>InnoDB Hot Backup</command> to back up the
+            <literal>InnoDB</literal> installation.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            Start a second <command>mysqld</command> server on the
+            backup and let it clean up the <filename>.ibd</filename>
+            files in the backup.
+          </para>
+        </listitem>
+
+      </orderedlist>
+
+    </section>
+
+    <section id="innodb-raw-devices">
+
+      <title>Using Raw Devices for the Shared Tablespace</title>
+
+      <para>
+        You can use raw disk partitions as data files in the shared
+        tablespace. By using a raw disk, you can perform non-buffered
+        I/O on Windows and on some Unix systems without filesystem
+        overhead, which may improve performance.
+      </para>
+
+      <para>
+        When you create a new data file, you must put the keyword
+        <literal>newraw</literal> immediately after the data file size
+        in <literal>innodb_data_file_path</literal>. The partition must
+        be at least as large as the size that you specify. Note that 1MB
+        in <literal>InnoDB</literal> is 1024 &times; 1024 bytes, whereas
+        1MB in disk specifications usually means 1,000,000 bytes.
+      </para>
+
+<programlisting>
+[mysqld]
+innodb_data_home_dir=
+innodb_data_file_path=/dev/hdd1:3Gnewraw;/dev/hdd2:2Gnewraw
+</programlisting>
+
+      <para>
+        The next time you start the server, <literal>InnoDB</literal>
+        notices the <literal>newraw</literal> keyword and initializes
+        the new partition. However, do not create or change any
+        <literal>InnoDB</literal> tables yet. Otherwise, when you next
+        restart the server, <literal>InnoDB</literal> reinitializes the
+        partition and your changes are lost. (As a safety measure
+        <literal>InnoDB</literal> prevents users from modifying data
+        when any partition with <literal>newraw</literal> is specified.)
+      </para>
+
+      <para>
+        After <literal>InnoDB</literal> has initialized the new
+        partition, stop the server, change <literal>newraw</literal> in
+        the data file specification to <literal>raw</literal>:
+      </para>
+
+<programlisting>
+[mysqld]
+innodb_data_home_dir=
+innodb_data_file_path=/dev/hdd1:5Graw;/dev/hdd2:2Graw
+</programlisting>
+
+      <para>
+        Then restart the server and <literal>InnoDB</literal> allows
+        changes to be made.
+      </para>
+
+      <para>
+        On Windows, you can allocate a disk partition as a data file
+        like this:
+      </para>
+
+<programlisting>
+[mysqld]
+innodb_data_home_dir=
+innodb_data_file_path=//./D::10Gnewraw
+</programlisting>
+
+      <para>
+        The <filename>//./</filename> corresponds to the Windows syntax
+        of <filename>\\.\</filename> for accessing physical drives.
+      </para>
+
+      <para>
+        When you use raw disk partitions, be sure that they have
+        permissions that allow read and write access by the account used
+        for running the MySQL server.
+      </para>
+
+    </section>
+
+  </section>
+
+  <section id="innodb-parameters">
+
+    <title><literal>InnoDB</literal> Startup Options and System Variables</title>
+
+    <para>
+      This section describes the <literal>InnoDB</literal>-related
+      command options and system variables. System variables that are
+      true or false can be enabled at server startup by naming them, or
+      disabled by using a <literal>skip-</literal> prefix. For example,
+      to enable or disable <literal>InnoDB</literal> checksums, you can
+      use <option>--innodb_checksums</option> or
+      <option>--skip-innodb_checksums</option> on the command line, or
+      <literal>innodb_checksums</literal> or
+      <literal>skip-innodb_checksums</literal> in an option file. System
+      variables that take a numeric value can be specified as
+      <option>--<replaceable>var_name</replaceable>=<replaceable>value</replaceable></option>
+      on the command line or as
+      <literal><replaceable>var_name</replaceable>=<replaceable>value</replaceable></literal>
+      in option files. For more information on specifying options and
+      system variables, see <xref linkend="program-options"/>. Many of
+      the system variables can be changed at runtime (see
+      <xref linkend="dynamic-system-variables"/>).
+    </para>
+
+    <formalpara role="mnmas">
+
+      <title>MySQL Enterprise</title>
+
+      <para>
+        The MySQL Enterprise Monitor provides expert advice on InnoDB
+        start-up options and related system variables. For more
+        information see
+        <ulink url="&base-url-enterprise;advisors.html"/>.
+      </para>
+
+    </formalpara>
+
+    <para>
+      <literal>InnoDB</literal> command options:
+    </para>
+
+    <para condition="dynamic:optvar:fullsummary" role="6.0:mysqld:section-innodb"/>
+
+    <itemizedlist>
+
+      <listitem>
+        <para id="option_mysqld_innodb">
+          <indexterm>
+            <primary>mysqld</primary>
+            <secondary>innodb option</secondary>
+          </indexterm>
+
+          <indexterm>
+            <primary>innodb option</primary>
+            <secondary>mysqld</secondary>
+          </indexterm>
+
+          <option>--innodb</option>
+        </para>
+
+        <para>
+          Enables the <literal>InnoDB</literal> storage engine, if the
+          server was compiled with <literal>InnoDB</literal> support.
+          Use <option>--skip-innodb</option> to disable
+          <literal>InnoDB</literal>.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para id="option_mysqld_innodb_status_file">
+          <indexterm>
+            <primary>mysqld</primary>
+            <secondary>innodb_status_file option</secondary>
+          </indexterm>
+
+          <indexterm>
+            <primary>innodb_status_file option</primary>
+            <secondary>mysqld</secondary>
+          </indexterm>
+
+          <option>--innodb_status_file</option>
+        </para>
+
+        <para>
+          Causes <literal>InnoDB</literal> to create a file named
+          <filename><replaceable>&lt;datadir&gt;</replaceable>/innodb_status.<replaceable>&lt;pid&gt;</replaceable></filename>
+          in the MySQL data directory. <literal>InnoDB</literal>
+          periodically writes the output of <literal>SHOW ENGINE INNODB
+          STATUS</literal> to this file.
+        </para>
+      </listitem>
+
+    </itemizedlist>
+
+    <para>
+      <literal>InnoDB</literal> system variables:
+    </para>
+
+    <itemizedlist>
+
+      <listitem>
+        <para id="option_mysqld_innodb_additional_mem_pool_size">
+          <literal>innodb_additional_mem_pool_size</literal>
+        </para>
+
+        <para>
+          The size in bytes of a memory pool <literal>InnoDB</literal>
+          uses to store data dictionary information and other internal
+          data structures. The more tables you have in your application,
+          the more memory you need to allocate here. If
+          <literal>InnoDB</literal> runs out of memory in this pool, it
+          starts to allocate memory from the operating system and writes
+          warning messages to the MySQL error log. The default value is
+          1MB.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para id="option_mysqld_innodb_autoextend_increment">
+          <literal>innodb_autoextend_increment</literal>
+        </para>
+
+        <para>
+          The increment size (in MB) for extending the size of an
+          auto-extending tablespace when it becomes full. The default
+          value is 8.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para id="option_mysqld_innodb_buffer_pool_awe_mem_mb">
+          <literal>innodb_buffer_pool_awe_mem_mb</literal>
+        </para>
+
+        <para>
+          The size of the buffer pool (in MB), if it is placed in the
+          AWE memory. This is relevant only in 32-bit Windows. If your
+          32-bit Windows operating system supports more than 4GB memory,
+          using so-called <quote>Address Windowing Extensions,</quote>
+          you can allocate the <literal>InnoDB</literal> buffer pool
+          into the AWE physical memory using this variable. The maximum
+          possible value for this variable is 63000. If it is greater
+          than 0, <literal>innodb_buffer_pool_size</literal> is the
+          window in the 32-bit address space of
+          <command>mysqld</command> where <literal>InnoDB</literal> maps
+          that AWE memory. A good value for
+          <literal>innodb_buffer_pool_size</literal> is 500MB.
+        </para>
+
+        <para>
+          To take advantage of AWE memory, you will need to recompile
+          MySQL yourself. The current project settings needed for doing
+          this can be found in the
+          <filename>storage/innobase/os/os0proj.c</filename> source
+          file.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para id="option_mysqld_innodb_autoinc_lock_mode">
+          <literal>innodb_autoinc_lock_mode</literal>
+        </para>
+
+        <para>
+          The locking mode to use for generating auto-increment values.
+          The allowable values are 0, 1, or 2, for
+          <quote>traditional</quote>, <quote>consecutive</quote>, or
+          <quote>interleaved</quote> lock mode, respectively. The
+          characteristics of these modes are described in
+          <xref linkend="innodb-auto-increment-handling"/>.
+        </para>
+
+        <para>
+          This variable has a default of 1 (<quote>consecutive</quote>
+          lock mode).
+        </para>
+      </listitem>
+
+      <listitem>
+        <para id="option_mysqld_innodb_buffer_pool_size">
+          <literal>innodb_buffer_pool_size</literal>
+        </para>
+
+        <para>
+          The size in bytes of the memory buffer
+          <literal>InnoDB</literal> uses to cache data and indexes of
+          its tables. The larger you set this value, the less disk I/O
+          is needed to access data in tables. On a dedicated database
+          server, you may set this to up to 80% of the machine physical
+          memory size. However, do not set it too large because
+          competition for physical memory might cause paging in the
+          operating system.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para id="option_mysqld_innodb_checksums">
+          <literal>innodb_checksums</literal>
+        </para>
+
+        <para>
+          <literal>InnoDB</literal> can use checksum validation on all
+          pages read from the disk to ensure extra fault tolerance
+          against broken hardware or data files. This validation is
+          enabled by default. However, under some rare circumstances
+          (such as when running benchmarks) this extra safety feature is
+          unneeded and can be disabled with
+          <option>--skip-innodb-checksums</option>.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para id="option_mysqld_innodb_commit_concurrency">
+          <literal>innodb_commit_concurrency</literal>
+        </para>
+
+        <para>
+          The number of threads that can commit at the same time.
+          Setting this parameter to 0 allows any number of transactions
+          to commit simultaneously.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para id="option_mysqld_innodb_concurrency_tickets">
+          <literal>innodb_concurrency_tickets</literal>
+        </para>
+
+        <para>
+          The number of threads that can enter <literal>InnoDB</literal>
+          concurrently is determined by the
+          <literal>innodb_thread_concurrency</literal> variable. A
+          thread is placed in a queue when it tries to enter
+          <literal>InnoDB</literal> if the number of threads has already
+          reached the concurrency limit. When a thread is allowed to
+          enter <literal>InnoDB</literal>, it is given a number of
+          <quote>free tickets</quote> equal to the value of
+          <literal>innodb_concurrency_tickets</literal>, and the thread
+          can enter and leave <literal>InnoDB</literal> freely until it
+          has used up its tickets. After that point, the thread again
+          becomes subject to the concurrency check (and possible
+          queuing) the next time it tries to enter
+          <literal>InnoDB</literal>.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para id="option_mysqld_innodb_data_file_path">
+          <literal>innodb_data_file_path</literal>
+        </para>
+
+        <para>
+          The paths to individual data files and their sizes. The full
+          directory path to each data file is formed by concatenating
+          <literal>innodb_data_home_dir</literal> to each path specified
+          here. The file sizes are specified in MB or GB (1024MB) by
+          appending <literal>M</literal> or <literal>G</literal> to the
+          size value. The sum of the sizes of the files must be at least
+          10MB. If you do not specify
+          <literal>innodb_data_file_path</literal>, the default behavior
+          is to create a single 10MB auto-extending data file named
+          <filename>ibdata1</filename>. The size limit of individual
+          files is determined by your operating system. You can set the
+          file size to more than 4GB on those operating systems that
+          support big files. You can also use raw disk partitions as
+          data files. See <xref linkend="innodb-raw-devices"/>.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para id="option_mysqld_innodb_data_home_dir">
+          <literal>innodb_data_home_dir</literal>
+        </para>
+
+        <para>
+          The common part of the directory path for all
+          <literal>InnoDB</literal> data files. If you do not set this
+          value, the default is the MySQL data directory. You can
+          specify the value as an empty string, in which case you can
+          use absolute file paths in
+          <literal>innodb_data_file_path</literal>.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para id="option_mysqld_innodb_doublewrite">
+          <literal>innodb_doublewrite</literal>
+        </para>
+
+        <para>
+          By default, <literal>InnoDB</literal> stores all data twice,
+          first to the doublewrite buffer, and then to the actual data
+          files. This variable is enabled by default. It can be turned
+          off with <option>--skip-innodb_doublewrite</option> for
+          benchmarks or cases when top performance is needed rather than
+          concern for data integrity or possible failures.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para id="option_mysqld_innodb_fast_shutdown">
+          <literal>innodb_fast_shutdown</literal>
+        </para>
+
+        <para>
+          If you set this variable to 0, <literal>InnoDB</literal> does
+          a full purge and an insert buffer merge before a shutdown.
+          These operations can take minutes, or even hours in extreme
+          cases. If you set this variable to 1,
+          <literal>InnoDB</literal> skips these operations at shutdown.
+          The default value is 1. If you set it to 2,
+          <literal>InnoDB</literal> will just flush its logs and then
+          shut down cold, as if MySQL had crashed; no committed
+          transaction will be lost, but crash recovery will be done at
+          the next startup. A value of 2 cannot be used on NetWare.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para id="option_mysqld_innodb_file_io_threads">
+          <literal>innodb_file_io_threads</literal>
+        </para>
+
+        <para>
+          The number of file I/O threads in <literal>InnoDB</literal>.
+          Normally, this should be left at the default value of 4, but
+          disk I/O on Windows may benefit from a larger number. On Unix,
+          increasing the number has no effect; <literal>InnoDB</literal>
+          always uses the default value.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para id="option_mysqld_innodb_file_per_table">
+          <literal>innodb_file_per_table</literal>
+        </para>
+
+        <para>
+          If this variable is enabled, <literal>InnoDB</literal> creates
+          each new table using its own <filename>.ibd</filename> file
+          for storing data and indexes, rather than in the shared
+          tablespace. The default is to create tables in the shared
+          tablespace. See <xref linkend="multiple-tablespaces"/>.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para id="option_mysqld_innodb_flush_log_at_trx_commit">
+          <literal>innodb_flush_log_at_trx_commit</literal>
+        </para>
+
+        <para>
+          When <literal>innodb_flush_log_at_trx_commit</literal> is set
+          to 0, the log buffer is written out to the log file once per
+          second and the flush to disk operation is performed on the log
+          file, but nothing is done at a transaction commit. When this
+          value is 1 (the default), the log buffer is written out to the
+          log file at each transaction commit and the flush to disk
+          operation is performed on the log file. When set to 2, the log
+          buffer is written out to the file at each commit, but the
+          flush to disk operation is not performed on it. However, the
+          flushing on the log file takes place once per second also when
+          the value is 2. Note that the once-per-second flushing is not
+          100% guaranteed to happen every second, due to process
+          scheduling issues.
+        </para>
+
+        <para>
+          The default value of this variable is 1, which is the value
+          that is required for ACID compliance. You can achieve better
+          performance by setting the value different from 1, but then
+          you can lose at most one second worth of transactions in a
+          crash. If you set the value to 0, then any
+          <command>mysqld</command> process crash can erase the last
+          second of transactions. If you set the value to 2, then only
+          an operating system crash or a power outage can erase the last
+          second of transactions. However, <literal>InnoDB</literal>'s
+          crash recovery is not affected and thus crash recovery does
+          work regardless of the value. Note that many operating systems
+          and some disk hardware fool the flush-to-disk operation. They
+          may tell <command>mysqld</command> that the flush has taken
+          place, even though it has not. Then the durability of
+          transactions is not guaranteed even with the setting 1, and in
+          the worst case a power outage can even corrupt the
+          <literal>InnoDB</literal> database. Using a battery-backed
+          disk cache in the SCSI disk controller or in the disk itself
+          speeds up file flushes, and makes the operation safer. You can
+          also try using the Unix command <command>hdparm</command> to
+          disable the caching of disk writes in hardware caches, or use
+          some other command specific to the hardware vendor.
+        </para>
+
+        <para>
+          Note: For the greatest possible durability and consistency in
+          a replication setup using <literal>InnoDB</literal> with
+          transactions, you should use
+          <literal>innodb_flush_log_at_trx_commit=1</literal> and
+          <literal>sync_binlog=1</literal> in your master server
+          <filename>my.cnf</filename> file.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para id="option_mysqld_innodb_flush_method">
+          <literal>innodb_flush_method</literal>
+        </para>
+
+        <para>
+          If set to <literal>fdatasync</literal> (the default),
+          <literal>InnoDB</literal> uses <literal>fsync()</literal> to
+          flush both the data and log files. If set to
+          <literal>O_DSYNC</literal>, <literal>InnoDB</literal> uses
+          <literal>O_SYNC</literal> to open and flush the log files, but
+          uses <literal>fsync()</literal> to flush the data files. If
+          <literal>O_DIRECT</literal> is specified (available on some
+          GNU/Linux versions, FreeBSD and Solaris),
+          <literal>InnoDB</literal> uses <literal>O_DIRECT</literal> (or
+          <literal>directio()</literal> on Solaris) to open the data
+          files, and uses <literal>fsync()</literal> to flush both the
+          data and log files. Note that <literal>InnoDB</literal> uses
+          <literal>fsync()</literal> instead of
+          <literal>fdatasync()</literal>, and it does not use
+          <literal>O_DSYNC</literal> by default because there have been
+          problems with it on many varieties of Unix. This variable is
+          relevant only for Unix. On Windows, the flush method is always
+          <literal>async_unbuffered</literal> and cannot be changed.
+        </para>
+
+        <para>
+          Different values of this variable can have a marked effect on
+          <literal>InnoDB performance</literal>. For example, on some
+          systems where <literal>InnoDB</literal> data and log files are
+          located on a SAN, it has been found that setting
+          <literal>innodb_flush_method</literal> to
+          <literal>O_DIRECT</literal> can degrade performance of simple
+          <literal>SELECT</literal> statements by a factor of three.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para id="option_mysqld_innodb_force_recovery">
+          <literal>innodb_force_recovery</literal>
+        </para>
+
+        <para>
+          The crash recovery mode.
+        </para>
+
+        <warning>
+          <para>
+            This variable should be set greater than 0 only in an
+            emergency situation when you want to dump your tables from a
+            corrupt database! Possible values are from 1 to 6. The
+            meanings of these values are described in
+            <xref linkend="forcing-recovery"/>. As a safety measure,
+            <literal>InnoDB</literal> prevents any changes to its data
+            when this variable is greater than 0.
+          </para>
+        </warning>
+      </listitem>
+
+      <listitem>
+        <para id="option_mysqld_innodb_lock_wait_timeout">
+          <literal>innodb_lock_wait_timeout</literal>
+        </para>
+
+        <para>
+          The timeout in seconds an <literal>InnoDB</literal>
+          transaction may wait for a lock before being rolled back.
+          <literal>InnoDB</literal> automatically detects transaction
+          deadlocks in its own lock table and rolls back the
+          transaction. <literal>InnoDB</literal> notices locks set using
+          the <literal>LOCK TABLES</literal> statement. The default is
+          50 seconds.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para id="option_mysqld_innodb_locks_unsafe_for_binlog">
+          <literal>innodb_locks_unsafe_for_binlog</literal>
+        </para>
+
+        <para>
+          <indexterm>
+            <primary>next-key lock</primary>
+          </indexterm>
+
+          <indexterm>
+            <primary>gap lock</primary>
+          </indexterm>
+
+          This variable controls next-key locking in
+          <literal>InnoDB</literal> searches and index scans. By
+          default, this variable is 0 (disabled), which means that
+          next-key locking is enabled.
+        </para>
+
+        <para>
+          Normally, <literal>InnoDB</literal> uses an algorithm called
+          <firstterm>next-key locking</firstterm>.
+          <literal>InnoDB</literal> performs row-level locking in such a
+          way that when it searches or scans a table index, it sets
+          shared or exclusive locks on any index records it encounters.
+          Thus, the row-level locks are actually index record locks. The
+          locks that <literal>InnoDB</literal> sets on index records
+          also affect the <quote>gap</quote> preceding that index
+          record. If a user has a shared or exclusive lock on record
+          <emphasis>R</emphasis> in an index, another user cannot insert
+          a new index record immediately before <emphasis>R</emphasis>
+          in the order of the index. Enabling this variable causes
+          <literal>InnoDB</literal> not to use next-key locking in
+          searches or index scans. Next-key locking is still used to
+          ensure foreign key constraints and duplicate key checking.
+          Note that enabling this variable may cause phantom problems:
+          Suppose that you want to read and lock all children from the
+          <literal>child</literal> table with an identifier value larger
+          than 100, with the intention of updating some column in the
+          selected rows later:
+        </para>
+
+<programlisting>
+SELECT * FROM child WHERE id &gt; 100 FOR UPDATE;
+</programlisting>
+
+        <para>
+          Suppose that there is an index on the <literal>id</literal>
+          column. The query scans that index starting from the first
+          record where <literal>id</literal> is greater than 100. If the
+          locks set on the index records do not lock out inserts made in
+          the gaps, another client can insert a new row into the table.
+          If you execute the same <literal>SELECT</literal> within the
+          same transaction, you see a new row in the result set returned
+          by the query. This also means that if new items are added to
+          the database, <literal>InnoDB</literal> does not guarantee
+          serializability. Therefore, if this variable is enabled,
+          <literal>InnoDB</literal> guarantees at most isolation level
+          <literal>READ COMMITTED</literal>. (Conflict serializability
+          is still guaranteed.)
+        </para>
+
+        <para>
+          Enabling this variable has an additional effect:
+          <literal>InnoDB</literal> in an <literal>UPDATE</literal> or a
+          <literal>DELETE</literal> only locks rows that it updates or
+          deletes. This greatly reduces the probability of deadlocks,
+          but they can happen. Note that enabling this variable still
+          does not allow operations such as <literal>UPDATE</literal> to
+          overtake other similar operations (such as another
+          <literal>UPDATE</literal>) even in the case when they affect
+          different rows. Consider the following example, beginning with
+          this table:
+        </para>
+
+<programlisting>
+CREATE TABLE A(A INT NOT NULL, B INT) ENGINE = InnoDB;
+INSERT INTO A VALUES (1,2),(2,3),(3,2),(4,3),(5,2);
+COMMIT;
+</programlisting>
+
+        <para>
+          Suppose that one client executes these statements:
+        </para>
+
+<programlisting>
+SET AUTOCOMMIT = 0;
+UPDATE A SET B = 5 WHERE B = 3;
+</programlisting>
+
+        <para>
+          Then suppose that another client executes these statements
+          following those of the first client:
+        </para>
+
+<programlisting>
+SET AUTOCOMMIT = 0;
+UPDATE A SET B = 4 WHERE B = 2;
+</programlisting>
+
+        <para>
+          In this case, the second <literal>UPDATE</literal> must wait
+          for a commit or rollback of the first
+          <literal>UPDATE</literal>. The first <literal>UPDATE</literal>
+          has an exclusive lock on row (2,3), and the second
+          <literal>UPDATE</literal> while scanning rows also tries to
+          acquire an exclusive lock for the same row, which it cannot
+          have. This is because <literal>UPDATE</literal> two first
+          acquires an exclusive lock on a row and then determines
+          whether the row belongs to the result set. If not, it releases
+          the unnecessary lock, when the
+          <literal>innodb_locks_unsafe_for_binlog</literal> variable is
+          enabled.
+        </para>
+
+        <para>
+          Therefore, <literal>InnoDB</literal> executes
+          <literal>UPDATE</literal> one as follows:
+        </para>
+
+<programlisting>
+x-lock(1,2)
+unlock(1,2)
+x-lock(2,3)
+update(2,3) to (2,5)
+x-lock(3,2)
+unlock(3,2)
+x-lock(4,3)
+update(4,3) to (4,5)
+x-lock(5,2)
+unlock(5,2)
+</programlisting>
+
+        <para>
+          <literal>InnoDB</literal> executes <literal>UPDATE</literal>
+          two as follows:
+        </para>
+
+<programlisting>
+x-lock(1,2)
+update(1,2) to (1,4)
+x-lock(2,3) - wait for query one to commit or rollback
+</programlisting>
+      </listitem>
+
+      <listitem>
+        <para id="option_mysqld_innodb_log_archive">
+          <literal>innodb_log_archive</literal>
+        </para>
+
+        <para>
+          Whether to log <literal>InnoDB</literal> archive files. This
+          variable is present for historical reasons, but is unused.
+          Recovery from a backup is done by MySQL using its own log
+          files, so there is no need to archive
+          <literal>InnoDB</literal> log files. The default for this
+          variable is 0.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para id="option_mysqld_innodb_log_buffer_size">
+          <literal>innodb_log_buffer_size</literal>
+        </para>
+
+        <para>
+          The size in bytes of the buffer that <literal>InnoDB</literal>
+          uses to write to the log files on disk. Sensible values range
+          from 1MB to 8MB. The default is 1MB. A large log buffer allows
+          large transactions to run without a need to write the log to
+          disk before the transactions commit. Thus, if you have big
+          transactions, making the log buffer larger saves disk I/O.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para id="option_mysqld_innodb_log_file_size">
+          <literal>innodb_log_file_size</literal>
+        </para>
+
+        <para>
+          The size in bytes of each log file in a log group. The
+          combined size of log files must be less than 4GB on 32-bit
+          computers. The default is 5MB. Sensible values range from 1MB
+          to 1/<replaceable>N</replaceable>-th of the size of the buffer
+          pool, where <replaceable>N</replaceable> is the number of log
+          files in the group. The larger the value, the less checkpoint
+          flush activity is needed in the buffer pool, saving disk I/O.
+          But larger log files also mean that recovery is slower in case
+          of a crash.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para id="option_mysqld_innodb_log_files_in_group">
+          <literal>innodb_log_files_in_group</literal>
+        </para>
+
+        <para>
+          The number of log files in the log group.
+          <literal>InnoDB</literal> writes to the files in a circular
+          fashion. The default (and recommended) is 2.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para id="option_mysqld_innodb_log_group_home_dir">
+          <literal>innodb_log_group_home_dir</literal>
+        </para>
+
+        <para>
+          The directory path to the <literal>InnoDB</literal> log files.
+          If you do not specify any <literal>InnoDB</literal> log
+          variables, the default is to create two 5MB files names
+          <filename>ib_logfile0</filename> and
+          <filename>ib_logfile1</filename> in the MySQL data directory.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para id="option_mysqld_innodb_max_dirty_pages_pct">
+          <literal>innodb_max_dirty_pages_pct</literal>
+        </para>
+
+        <para>
+          This is an integer in the range from 0 to 100. The default is
+          90. The main thread in <literal>InnoDB</literal> tries to
+          write pages from the buffer pool so that the percentage of
+          dirty (not yet written) pages will not exceed this value.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para id="option_mysqld_innodb_max_purge_lag">
+          <literal>innodb_max_purge_lag</literal>
+        </para>
+
+        <para>
+          This variable controls how to delay <literal>INSERT</literal>,
+          <literal>UPDATE</literal> and <literal>DELETE</literal>
+          operations when the purge operations are lagging (see
+          <xref linkend="innodb-multi-versioning"/>). The default value
+          of this variable is 0, meaning that there are no delays.
+        </para>
+
+        <para>
+          The <literal>InnoDB</literal> transaction system maintains a
+          list of transactions that have delete-marked index records by
+          <literal>UPDATE</literal> or <literal>DELETE</literal>
+          operations. Let the length of this list be
+          <replaceable>purge_lag</replaceable>. When
+          <replaceable>purge_lag</replaceable> exceeds
+          <literal>innodb_max_purge_lag</literal>, each
+          <literal>INSERT</literal>, <literal>UPDATE</literal> and
+          <literal>DELETE</literal> operation is delayed by
+          ((<replaceable>purge_lag</replaceable>/<literal>innodb_max_purge_lag</literal>)&times;10)&minus;5
+          milliseconds. The delay is computed in the beginning of a
+          purge batch, every ten seconds. The operations are not delayed
+          if purge cannot run because of an old consistent read view
+          that could see the rows to be purged.
+        </para>
+
+        <para>
+          A typical setting for a problematic workload might be 1
+          million, assuming that our transactions are small, only 100
+          bytes in size, and we can allow 100MB of unpurged rows in our
+          tables.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para id="option_mysqld_innodb_mirrored_log_groups">
+          <literal>innodb_mirrored_log_groups</literal>
+        </para>
+
+        <para>
+          The number of identical copies of log groups to keep for the
+          database. Currently, this should be set to 1.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para id="option_mysqld_innodb_open_files">
+          <literal>innodb_open_files</literal>
+        </para>
+
+        <para>
+          This variable is relevant only if you use multiple tablespaces
+          in <literal>InnoDB</literal>. It specifies the maximum number
+          of <filename>.ibd</filename> files that
+          <literal>InnoDB</literal> can keep open at one time. The
+          minimum value is 10. The default is 300.
+        </para>
+
+        <para>
+          The file descriptors used for <filename>.ibd</filename> files
+          are for <literal>InnoDB</literal> only. They are independent
+          of those specified by the <option>--open-files-limit</option>
+          server option, and do not affect the operation of the table
+          cache.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para id="option_mysqld_innodb_rollback_on_timeout">
+          <literal>innodb_rollback_on_timeout</literal>
+        </para>
+
+        <para>
+          In MySQL &current-series;, <literal>InnoDB</literal> rolls
+          back only the last statement on a transaction timeout. If this
+          option is given, a transaction timeout causes
+          <literal>InnoDB</literal> to abort and roll back the entire
+          transaction (the same behavior as in MySQL 4.1).
+        </para>
+      </listitem>
+
+      <listitem>
+        <para id="option_mysqld_innodb_stats_on_metadata">
+          <literal>innodb_stats_on_metadata</literal>
+        </para>
+
+        <para>
+          When this variable is enabled (which is the default, as before
+          the variable was created), <literal>InnoDB</literal> updates
+          statistics during metadata statements such as <literal>SHOW
+          TABLE STATUS</literal> or <literal>SHOW INDEX</literal>, or
+          when accessing the <literal>INFORMATION_SCHEMA</literal>
+          tables <literal>TABLES</literal> or
+          <literal>STATISTICS</literal>. (These updates are similar to
+          what happens for <literal>ANALYZE TABLE</literal>.) When
+          disabled, <literal>InnoDB</literal> does not updates
+          statistics during these operations. Disabling this variable
+          can improve access speed for schemas that have a large number
+          of tables or indexes. It can also improve the stability of
+          execution plans for queries that involve
+          <literal>InnoDB</literal> tables.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para id="option_mysqld_innodb_support_xa">
+          <literal>innodb_support_xa</literal>
+        </para>
+
+        <para>
+          When set to <literal>ON</literal> or 1 (the default), this
+          variable enables <literal>InnoDB</literal> support for
+          two-phase commit in XA transactions. Enabling
+          <literal>innodb_support_xa</literal> causes an extra disk
+          flush for transaction preparation.
+        </para>
+
+        <para>
+          If you don't care about using XA, you can disable this
+          variable by setting it to <literal>OFF</literal> or 0 to
+          reduce the number of disk flushes and get better
+          <literal>InnoDB</literal> performance. If you are using
+          replication and/or the binary log then you should set this
+          variable to <literal>ON</literal> or 1 to ensure that the
+          binary log does not get out of sync compared to the table
+          data.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para id="option_mysqld_innodb_sync_spin_loops">
+          <literal>innodb_sync_spin_loops</literal>
+        </para>
+
+        <para>
+          The number of times a thread waits for an
+          <literal>InnoDB</literal> mutex to be freed before the thread
+          is suspended.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para id="option_mysqld_innodb_table_locks">
+          <literal>innodb_table_locks</literal>
+        </para>
+
+        <para>
+          If <literal>AUTOCOMMIT=0</literal>, <literal>InnoDB</literal>
+          honors <literal>LOCK TABLES</literal>; MySQL does not return
+          from <literal>LOCK TABLE .. WRITE</literal> until all other
+          threads have released all their locks to the table. The
+          default value of <literal>innodb_table_locks</literal> is 1,
+          which means that <literal>LOCK TABLES</literal> causes InnoDB
+          to lock a table internally if <literal>AUTOCOMMIT=0</literal>.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para id="option_mysqld_innodb_thread_concurrency">
+          <literal>innodb_thread_concurrency</literal>
+        </para>
+
+        <para>
+          <literal>InnoDB</literal> tries to keep the number of
+          operating system threads concurrently inside
+          <literal>InnoDB</literal> less than or equal to the limit
+          given by this variable. Once the number of threads reaches
+          this limit, additional threads are placed into a wait state
+          within a FIFO queue for execution. Threads waiting for locks
+          are not counted in the number of concurrently executing
+          threads.
+        </para>
+
+        <para>
+          The correct value for this variable is dependent on
+          environment and workload. You will need to try a range of
+          different values to determine what value works for your
+          application.
+        </para>
+
+        <para>
+          The range of this variable is 0 to 1000. You can disable
+          thread concurrency checking by setting the value to 0, which
+          allows InnoDB to create as many threads as it needs.
+        </para>
+
+        <para>
+          The default value is 8.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para id="option_mysqld_innodb_thread_sleep_delay">
+          <literal>innodb_thread_sleep_delay</literal>
+        </para>
+
+        <para>
+          How long <literal>InnoDB</literal> threads sleep before
+          joining the <literal>InnoDB</literal> queue, in microseconds.
+          The default value is 10,000. A value of 0 disables sleep.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para>
+          <literal>sync_binlog</literal>
+        </para>
+
+        <para>
+          If the value of this variable is positive, the MySQL server
+          synchronizes its binary log to disk
+          (<literal>fdatasync()</literal>) after every
+          <literal>sync_binlog</literal> writes to this binary log. Note
+          that there is one write to the binary log per statement if in
+          autocommit mode, and otherwise one write per transaction. The
+          default value is 0 which does no synchronizing to disk. A
+          value of 1 is the safest choice, because in the event of a
+          crash you lose at most one statement/transaction from the
+          binary log; however, it is also the slowest choice (unless the
+          disk has a battery-backed cache, which makes synchronization
+          very fast).
+        </para>
+      </listitem>
+
+    </itemizedlist>
+
+  </section>
+
+  <section id="innodb-init">
+
+    <title>Creating the <literal>InnoDB</literal> Tablespace</title>
+
+    <para>
+      Suppose that you have installed MySQL and have edited your option
+      file so that it contains the necessary <literal>InnoDB</literal>
+      configuration parameters. Before starting MySQL, you should verify
+      that the directories you have specified for
+      <literal>InnoDB</literal> data files and log files exist and that
+      the MySQL server has access rights to those directories.
+      <literal>InnoDB</literal> does not create directories, only files.
+      Check also that you have enough disk space for the data and log
+      files.
+    </para>
+
+    <para>
+      It is best to run the MySQL server <command>mysqld</command> from
+      the command prompt when you first start the server with
+      <literal>InnoDB</literal> enabled, not from the
+      <command>mysqld_safe</command> wrapper or as a Windows service.
+      When you run from a command prompt you see what
+      <command>mysqld</command> prints and what is happening. On Unix,
+      just invoke <command>mysqld</command>. On Windows, use the
+      <option>--console</option> option.
+    </para>
+
+    <para>
+      When you start the MySQL server after initially configuring
+      <literal>InnoDB</literal> in your option file,
+      <literal>InnoDB</literal> creates your data files and log files,
+      and prints something like this:
+    </para>
+
+<programlisting>
+InnoDB: The first specified datafile /home/heikki/data/ibdata1
+did not exist:
+InnoDB: a new database to be created!
+InnoDB: Setting file /home/heikki/data/ibdata1 size to 134217728
+InnoDB: Database physically writes the file full: wait...
+InnoDB: datafile /home/heikki/data/ibdata2 did not exist:
+new to be created
+InnoDB: Setting file /home/heikki/data/ibdata2 size to 262144000
+InnoDB: Database physically writes the file full: wait...
+InnoDB: Log file /home/heikki/data/logs/ib_logfile0 did not exist:
+new to be created
+InnoDB: Setting log file /home/heikki/data/logs/ib_logfile0 size
+to 5242880
+InnoDB: Log file /home/heikki/data/logs/ib_logfile1 did not exist:
+new to be created
+InnoDB: Setting log file /home/heikki/data/logs/ib_logfile1 size
+to 5242880
+InnoDB: Doublewrite buffer not found: creating new
+InnoDB: Doublewrite buffer created
+InnoDB: Creating foreign key constraint system tables
+InnoDB: Foreign key constraint system tables created
+InnoDB: Started
+mysqld: ready for connections
+</programlisting>
+
+    <para>
+      At this point <literal>InnoDB</literal> has initialized its
+      tablespace and log files. You can connect to the MySQL server with
+      the usual MySQL client programs like <command>mysql</command>.
+      When you shut down the MySQL server with <command>mysqladmin
+      shutdown</command>, the output is like this:
+    </para>
+
+<programlisting>
+010321 18:33:34  mysqld: Normal shutdown
+010321 18:33:34  mysqld: Shutdown Complete
+InnoDB: Starting shutdown...
+InnoDB: Shutdown completed
+</programlisting>
+
+    <para>
+      You can look at the data file and log directories and you see the
+      files created there. When MySQL is started again, the data files
+      and log files have been created already, so the output is much
+      briefer:
+    </para>
+
+<programlisting>
+InnoDB: Started
+mysqld: ready for connections
+</programlisting>
+
+    <para>
+      If you add the <literal>innodb_file_per_table</literal> option to
+      <filename>my.cnf</filename>, <literal>InnoDB</literal> stores each
+      table in its own <filename>.ibd</filename> file in the same MySQL
+      database directory where the <filename>.frm</filename> file is
+      created. See <xref linkend="multiple-tablespaces"/>.
+    </para>
+
+    <section id="error-creating-innodb">
+
+      <title>Dealing with <literal>InnoDB</literal> Initialization Problems</title>
+
+      <para>
+        If <literal>InnoDB</literal> prints an operating system error
+        during a file operation, usually the problem has one of the
+        following causes:
+      </para>
+
+      <itemizedlist>
+
+        <listitem>
+          <para>
+            You did not create the <literal>InnoDB</literal> data file
+            directory or the <literal>InnoDB</literal> log directory.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            <command>mysqld</command> does not have access rights to
+            create files in those directories.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            <command>mysqld</command> cannot read the proper
+            <filename>my.cnf</filename> or <filename>my.ini</filename>
+            option file, and consequently does not see the options that
+            you specified.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            The disk is full or a disk quota is exceeded.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            You have created a subdirectory whose name is equal to a
+            data file that you specified, so the name cannot be used as
+            a filename.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            There is a syntax error in the
+            <literal>innodb_data_home_dir</literal> or
+            <literal>innodb_data_file_path</literal> value.
+          </para>
+        </listitem>
+
+      </itemizedlist>
+
+      <para>
+        If something goes wrong when <literal>InnoDB</literal> attempts
+        to initialize its tablespace or its log files, you should delete
+        all files created by <literal>InnoDB</literal>. This means all
+        <filename>ibdata</filename> files and all
+        <filename>ib_logfile</filename> files. In case you have already
+        created some <literal>InnoDB</literal> tables, delete the
+        corresponding <filename>.frm</filename> files for these tables
+        (and any <filename>.ibd</filename> files if you are using
+        multiple tablespaces) from the MySQL database directories as
+        well. Then you can try the <literal>InnoDB</literal> database
+        creation again. It is best to start the MySQL server from a
+        command prompt so that you see what is happening.
+      </para>
+
+    </section>
+
+  </section>
+
+  <section id="using-innodb-tables">
+
+    <title>Creating and Using <literal>InnoDB</literal> Tables</title>
+
+    <para>
+      To create an <literal>InnoDB</literal> table, specify an
+      <literal>ENGINE = InnoDB</literal> option in the <literal>CREATE
+      TABLE</literal> statement:
+    </para>
+
+<programlisting>
+CREATE TABLE customers (a INT, b CHAR (20), INDEX (a)) ENGINE=InnoDB;
+</programlisting>
+
+    <para>
+      The statement creates a table and an index on column
+      <literal>a</literal> in the <literal>InnoDB</literal> tablespace
+      that consists of the data files that you specified in
+      <filename>my.cnf</filename>. In addition, MySQL creates a file
+      <filename>customers.frm</filename> in the
+      <filename>test</filename> directory under the MySQL database
+      directory. Internally, <literal>InnoDB</literal> adds an entry for
+      the table to its own data dictionary. The entry includes the
+      database name. For example, if <literal>test</literal> is the
+      database in which the <literal>customers</literal> table is
+      created, the entry is for <literal>'test/customers'</literal>.
+      This means you can create a table of the same name
+      <literal>customers</literal> in some other database, and the table
+      names do not collide inside <literal>InnoDB</literal>.
+    </para>
+
+    <para>
+      You can query the amount of free space in the
+      <literal>InnoDB</literal> tablespace by issuing a <literal>SHOW
+      TABLE STATUS</literal> statement for any <literal>InnoDB</literal>
+      table. The amount of free space in the tablespace appears in the
+      <literal>Comment</literal> section in the output of <literal>SHOW
+      TABLE STATUS</literal>. For example:
+    </para>
+
+<programlisting>
+SHOW TABLE STATUS FROM test LIKE 'customers'
+</programlisting>
+
+    <para>
+      Note that the statistics <literal>SHOW</literal> displays for
+      <literal>InnoDB</literal> tables are only approximate. They are
+      used in SQL optimization. Table and index reserved sizes in bytes
+      are accurate, though.
+    </para>
+
+    <section id="innodb-transactions-with-different-apis">
+
+      <title>How to Use Transactions in <literal>InnoDB</literal> with Different APIs</title>
+
+      <para>
+        By default, each client that connects to the MySQL server begins
+        with autocommit mode enabled, which automatically commits every
+        SQL statement as you execute it. To use multiple-statement
+        transactions, you can switch autocommit off with the SQL
+        statement <literal>SET AUTOCOMMIT = 0</literal> and use
+        <literal>COMMIT</literal> and <literal>ROLLBACK</literal> to
+        commit or roll back your transaction. If you want to leave
+        autocommit on, you can enclose your transactions within
+        <literal>START TRANSACTION</literal> and either
+        <literal>COMMIT</literal> or <literal>ROLLBACK</literal>. The
+        following example shows two transactions. The first is
+        committed; the second is rolled back.
+      </para>
+
+<programlisting>
+shell&gt; <userinput>mysql test</userinput>
+
+mysql&gt; <userinput>CREATE TABLE CUSTOMER (A INT, B CHAR (20), INDEX (A))</userinput>
+    -&gt; <userinput>ENGINE=InnoDB;</userinput>
+Query OK, 0 rows affected (0.00 sec)
+mysql&gt; <userinput>START TRANSACTION;</userinput>
+Query OK, 0 rows affected (0.00 sec)
+mysql&gt; <userinput>INSERT INTO CUSTOMER VALUES (10, 'Heikki');</userinput>
+Query OK, 1 row affected (0.00 sec)
+mysql&gt; <userinput>COMMIT;</userinput>
+Query OK, 0 rows affected (0.00 sec)
+mysql&gt; <userinput>SET AUTOCOMMIT=0;</userinput>
+Query OK, 0 rows affected (0.00 sec)
+mysql&gt; <userinput>INSERT INTO CUSTOMER VALUES (15, 'John');</userinput>
+Query OK, 1 row affected (0.00 sec)
+mysql&gt; <userinput>ROLLBACK;</userinput>
+Query OK, 0 rows affected (0.00 sec)
+mysql&gt; <userinput>SELECT * FROM CUSTOMER;</userinput>
++------+--------+
+| A    | B      |
++------+--------+
+|   10 | Heikki |
++------+--------+
+1 row in set (0.00 sec)
+mysql&gt;
+</programlisting>
+
+      <para>
+        In APIs such as PHP, Perl DBI, JDBC, ODBC, or the standard C
+        call interface of MySQL, you can send transaction control
+        statements such as <literal>COMMIT</literal> to the MySQL server
+        as strings just like any other SQL statements such as
+        <literal>SELECT</literal> or <literal>INSERT</literal>. Some
+        APIs also offer separate special transaction commit and rollback
+        functions or methods.
+      </para>
+
+    </section>
+
+    <section id="converting-tables-to-innodb">
+
+      <title>Converting <literal>MyISAM</literal> Tables to <literal>InnoDB</literal></title>
+
+      <para>
+        Important: Do not convert MySQL system tables in the
+        <literal>mysql</literal> database (such as
+        <literal>user</literal> or <literal>host</literal>) to the
+        <literal>InnoDB</literal> type. This is an unsupported
+        operation. The system tables must always be of the
+        <literal>MyISAM</literal> type.
+      </para>
+
+      <para>
+        If you want all your (non-system) tables to be created as
+        <literal>InnoDB</literal> tables, you can simply add the line
+        <literal>default-storage-engine=innodb</literal> to the
+        <literal>[mysqld]</literal> section of your server option file.
+      </para>
+
+      <para>
+        <literal>InnoDB</literal> does not have a special optimization
+        for separate index creation the way the
+        <literal>MyISAM</literal> storage engine does. Therefore, it
+        does not pay to export and import the table and create indexes
+        afterward. The fastest way to alter a table to
+        <literal>InnoDB</literal> is to do the inserts directly to an
+        <literal>InnoDB</literal> table. That is, use <literal>ALTER
+        TABLE ... ENGINE=INNODB</literal>, or create an empty
+        <literal>InnoDB</literal> table with identical definitions and
+        insert the rows with <literal>INSERT INTO ... SELECT * FROM
+        ...</literal>.
+      </para>
+
+      <para>
+        If you have <literal>UNIQUE</literal> constraints on secondary
+        keys, you can speed up a table import by turning off the
+        uniqueness checks temporarily during the import operation:
+      </para>
+
+<programlisting>
+SET UNIQUE_CHECKS=0;
+<replaceable>... import operation ...</replaceable>
+SET UNIQUE_CHECKS=1;
+</programlisting>
+
+      <para>
+        For big tables, this saves a lot of disk I/O because
+        <literal>InnoDB</literal> can then use its insert buffer to
+        write secondary index records as a batch. Be certain that the
+        data contains no duplicate keys.
+        <literal>UNIQUE_CHECKS</literal> allows but does not require
+        storage engines to ignore duplicate keys.
+      </para>
+
+      <para>
+        To get better control over the insertion process, it might be
+        good to insert big tables in pieces:
+      </para>
+
+<programlisting>
+INSERT INTO newtable SELECT * FROM oldtable
+   WHERE yourkey &gt; something AND yourkey &lt;= somethingelse;
+</programlisting>
+
+      <para>
+        After all records have been inserted, you can rename the tables.
+      </para>
+
+      <para>
+        During the conversion of big tables, you should increase the
+        size of the <literal>InnoDB</literal> buffer pool to reduce disk
+        I/O. Do not use more than 80% of the physical memory, though.
+        You can also increase the sizes of the <literal>InnoDB</literal>
+        log files.
+      </para>
+
+      <para>
+        Make sure that you do not fill up the tablespace:
+        <literal>InnoDB</literal> tables require a lot more disk space
+        than <literal>MyISAM</literal> tables. If an <literal>ALTER
+        TABLE</literal> operation runs out of space, it starts a
+        rollback, and that can take hours if it is disk-bound. For
+        inserts, <literal>InnoDB</literal> uses the insert buffer to
+        merge secondary index records to indexes in batches. That saves
+        a lot of disk I/O. For rollback, no such mechanism is used, and
+        the rollback can take 30 times longer than the insertion.
+      </para>
+
+      <para>
+        In the case of a runaway rollback, if you do not have valuable
+        data in your database, it may be advisable to kill the database
+        process rather than wait for millions of disk I/O operations to
+        complete. For the complete procedure, see
+        <xref linkend="forcing-recovery"/>.
+      </para>
+
+    </section>
+
+    <section id="innodb-auto-increment-handling">
+
+      <title>How <literal>AUTO_INCREMENT</literal> Handling Works in
+        <literal>InnoDB</literal></title>
+
+      <para>
+        <literal>InnoDB</literal> provides a locking strategy that
+        significantly improves scalability and performance of SQL
+        statements that add rows to tables with
+        <literal>AUTO_INCREMENT</literal> columns. This section provides
+        background information on the original
+        (<quote>traditional</quote>) implementation of auto-increment
+        locking in <literal>InnoDB</literal>, explains the configurable
+        locking mechanism, documents the parameter for configuring the
+        mechanism, and describes its behavior and interaction with
+        replication.
+      </para>
+
+      <section id="innodb-auto-increment-traditional">
+
+        <title><quote>Traditional</quote> <literal>InnoDB</literal> Auto-Increment
+          Locking</title>
+
+        <para>
+          The original implementation of auto-increment handling in
+          <literal>InnoDB</literal> uses the following strategy to
+          prevent problems when using the binary log for statement-based
+          replication or for certain recovery scenarios.
+        </para>
+
+        <para>
+          If you specify an <literal>AUTO_INCREMENT</literal> column for
+          an <literal>InnoDB</literal> table, the table handle in the
+          <literal>InnoDB</literal> data dictionary contains a special
+          counter called the auto-increment counter that is used in
+          assigning new values for the column. This counter is stored
+          only in main memory, not on disk.
+        </para>
+
+        <para>
+          <literal>InnoDB</literal> uses the following algorithm to
+          initialize the auto-increment counter for a table
+          <literal>t</literal> that contains an
+          <literal>AUTO_INCREMENT</literal> column named
+          <literal>ai_col</literal>: After a server startup, for the
+          first insert into a table <literal>t</literal>,
+          <literal>InnoDB</literal> executes the equivalent of this
+          statement:
+        </para>
+
+<programlisting>
+SELECT MAX(ai_col) FROM t FOR UPDATE;
+</programlisting>
+
+        <para>
+          <literal>InnoDB</literal> increments by one the value
+          retrieved by the statement and assigns it to the column and to
+          the auto-increment counter for the table. If the table is
+          empty, <literal>InnoDB</literal> uses the value
+          <literal>1</literal>. If a user invokes a <literal>SHOW TABLE
+          STATUS</literal> statement that displays output for the table
+          <literal>t</literal> and the auto-increment counter has not
+          been initialized, <literal>InnoDB</literal> initializes but
+          does not increment the value and stores it for use by later
+          inserts. This initialization uses a normal exclusive-locking
+          read on the table and the lock lasts to the end of the
+          transaction.
+        </para>
+
+        <para>
+          <literal>InnoDB</literal> follows the same procedure for
+          initializing the auto-increment counter for a freshly created
+          table.
+        </para>
+
+        <para>
+          After the auto-increment counter has been initialized, if a
+          user does not explicitly specify a value for an
+          <literal>AUTO_INCREMENT</literal> column,
+          <literal>InnoDB</literal> increments the counter by one and
+          assigns the new value to the column. If the user inserts a row
+          that explicitly specifies the column value, and the value is
+          bigger than the current counter value, the counter is set to
+          the specified column value.
+        </para>
+
+        <para>
+          When accessing the auto-increment counter,
+          <literal>InnoDB</literal> uses a special table-level
+          <literal>AUTO-INC</literal> lock that it keeps to the end of
+          the current SQL statement, not to the end of the transaction.
+          The special lock release strategy was introduced to improve
+          concurrency for inserts into a table containing an
+          <literal>AUTO_INCREMENT</literal> column. Nevertheless, two
+          transactions cannot have the <literal>AUTO-INC</literal> lock
+          on the same table simultaneously, which can have a performance
+          impact if the <literal>AUTO-INC</literal> lock is held for a
+          long time. That might be the case for a statement such as
+          <literal>INSERT INTO t1 ... SELECT ... FROM t2</literal> that
+          inserts all rows from one table into another.
+        </para>
+
+        <para>
+          <literal>InnoDB</literal> uses the in-memory auto-increment
+          counter as long as the server runs. When the server is stopped
+          and restarted, <literal>InnoDB</literal> reinitializes the
+          counter for each table for the first <literal>INSERT</literal>
+          to the table, as described earlier.
+        </para>
+
+        <para>
+          You may see gaps in the sequence of values assigned to the
+          <literal>AUTO_INCREMENT</literal> column if you roll back
+          transactions that have generated numbers using the counter.
+        </para>
+
+        <para>
+          If a user specifies <literal>NULL</literal> or
+          <literal>0</literal> for the <literal>AUTO_INCREMENT</literal>
+          column in an <literal>INSERT</literal>,
+          <literal>InnoDB</literal> treats the row as if the value had
+          not been specified and generates a new value for it.
+        </para>
+
+        <para>
+          The behavior of the auto-increment mechanism is not defined if
+          a user assigns a negative value to the column or if the value
+          becomes bigger than the maximum integer that can be stored in
+          the specified integer type.
+        </para>
+
+        <para>
+          An <literal>AUTO_INCREMENT</literal> column must be the first
+          column listed if it is part of a multiple-column index in an
+          <literal>InnoDB</literal> table.
+        </para>
+
+        <para>
+          <literal>InnoDB</literal> supports the <literal>AUTO_INCREMENT
+          = <replaceable>N</replaceable></literal> table option in
+          <literal>CREATE TABLE</literal> and <literal>ALTER
+          TABLE</literal> statements, to set the initial counter value
+          or alter the current counter value. The effect of this option
+          is canceled by a server restart, for reasons discussed earlier
+          in this section.
+        </para>
+
+      </section>
+
+      <section id="innodb-auto-increment-configurable">
+
+        <title>Configurable <literal>InnoDB</literal> Auto-Increment Locking</title>
+
+        <para>
+          As described in the previous section,
+          <literal>InnoDB</literal> uses a special lock called the
+          AUTO-INC table-level lock for inserts into tables with
+          <literal>AUTO_INCREMENT</literal> columns. This lock is
+          normally held to the end of the statement (not to the end of
+          the transaction), to ensure that auto-increment numbers are
+          assigned in a predictable and repeatable order for a given
+          sequence of <literal>INSERT</literal> statements.
+        </para>
+
+        <para>
+          In the case of statement-based replication, this means that
+          when a SQL statement is replicated on a slave server, the same
+          values are used for the auto-increment column as on the master
+          server. The result of execution of multiple
+          <literal>INSERT</literal> statements is deterministic, and the
+          slave reproduces the same data as on the master. If
+          auto-increment values generated by multiple
+          <literal>INSERT</literal> statements were interleaved, the
+          result of two concurrent <literal>INSERT</literal> statements
+          would be non-deterministic, and could not reliably be
+          propagated to a slave server using statement-based
+          replication.
+        </para>
+
+        <para>
+          To make this clear, consider an example that uses this table:
+        </para>
+
+<programlisting>
+CREATE TABLE t1 (
+  c1 INT(11) NOT NULL AUTO_INCREMENT,
+  c2 VARCHAR(10) DEFAULT NULL,
+  PRIMARY KEY (c1)
+) ENGINE=InnoDB;
+</programlisting>
+
+        <para>
+          Suppose that there are two transactions running, each
+          inserting rows into a table with an
+          <literal>AUTO_INCREMENT</literal> column. One transaction is
+          using an <literal>INSERT ... SELECT</literal> statement that
+          inserts 1000 rows, and another is using a simple
+          <literal>INSERT</literal> statement that inserts one row:
+        </para>
+
+<programlisting>
+Tx1: INSERT INTO t1 (c2) SELECT 1000 rows from another table ...
+Tx2: INSERT INTO t1 (c2) VALUES ('xxx');
+</programlisting>
+
+        <para>
+          <literal>InnoDB</literal> cannot tell in advance how many rows
+          will be retrieved from the <literal>SELECT</literal> in the
+          <literal>INSERT</literal> statement in Tx1, and it assigns the
+          auto-increment values one at a time as the statement proceeds.
+          With a table-level lock, held to the end of the statement,
+          only one <literal>INSERT</literal> statement referring to
+          table <literal>t1</literal> can execute at a time, and the
+          generation of auto-increment numbers by different statements
+          is not interleaved. The auto-increment value generated by the
+          Tx1 <literal>INSERT ... SELECT</literal> statement will be
+          consecutive, and the (single) auto-increment value used by the
+          <literal>INSERT</literal> statement in Tx2 will either be
+          smaller or larger than all those used for Tx1, depending on
+          which statement executes first.
+        </para>
+
+        <para>
+          As long as the SQL statements execute in the same order when
+          replayed from the binary log (when using statement-based
+          replication, or in recovery scenarios), the results will be
+          the same as they were when Tx1 and Tx2 first ran. Thus,
+          table-level locks held until the end of a statement make
+          <literal>INSERT</literal> statements using auto-increment safe
+          for use with statement-based replication. However, those locks
+          limit concurrency and scalability when multiple transactions
+          are executing insert statements at the same time.
+        </para>
+
+        <para>
+          In the preceding example, if there were no table-level lock,
+          the value of the auto-increment column used for the
+          <literal>INSERT</literal> in Tx2 depends on precisely when the
+          statement executes. If the <literal>INSERT</literal> of Tx2
+          executes while the <literal>INSERT</literal> of Tx1 is running
+          (rather than before it starts or after it completes), the
+          specific auto-increment values assigned by the two
+          <literal>INSERT</literal> statements are non-deterministic,
+          and may vary from run to run.
+        </para>
+
+        <para>
+          In MySQL &current-series;, <literal>InnoDB</literal> can avoid
+          using the table-level AUTO-INC lock for a class of
+          <literal>INSERT</literal> statements where the number of rows
+          is known in advance, and still preserve deterministic
+          execution and safety for statement-based replication. Further,
+          if you are not using the binary log to replay SQL statements
+          as part of recovery or replication, you can entirely eliminate
+          use of the AUTO-INC table-level lock for even greater
+          concurrency and performance&mdash;at the cost of permitting
+          gaps in auto-increment numbers assigned by a statement and
+          potentially having the numbers assigned by concurrently
+          executing statements interleaved.
+        </para>
+
+        <para>
+          For <literal>INSERT</literal> statements where the number of
+          rows to be inserted is known at the beginning of processing
+          the statement, <literal>InnoDB</literal> quickly allocates the
+          required number of auto-increment values without taking any
+          lock, but only if there is no concurrent session already
+          holding the table-level lock AUTO-INC lock (because that other
+          statement will be allocating auto-increment values one-by-one
+          as it proceeds). More precisely, such an
+          <literal>INSERT</literal> statement obtains auto-increment
+          values under the control of a mutex (a light-weight lock) that
+          is <emphasis>not</emphasis> held until the statement
+          completes, but only for the duration of the allocation
+          process.
+        </para>
+
+        <para>
+          This new locking scheme allows much greater scalability, but
+          it does introduce some subtle differences in how
+          auto-increment values are assigned compared to the original
+          mechanism. To describe the way auto-increment works in
+          <literal>InnoDB</literal>, the following discussion defines
+          some terms, and explains how <literal>InnoDB</literal> behaves
+          using different settings of the new
+          <literal>innodb_autoinc_lock_mode</literal> configuration
+          parameter. Additional considerations are described following
+          the explanation of auto-increment locking behavior.
+        </para>
+
+        <para>
+          First, some definitions:
+        </para>
+
+        <itemizedlist>
+
+          <listitem>
+            <para>
+              <quote><literal>INSERT</literal>-like</quote> statements
+            </para>
+
+            <para>
+              All statements that generate new rows in a table,
+              including <literal>INSERT</literal>, <literal>INSERT ...
+              SELECT</literal>, <literal>REPLACE</literal>,
+              <literal>REPLACE ... SELECT</literal>, and <literal>LOAD
+              DATA</literal>.
+            </para>
+          </listitem>
+
+          <listitem>
+            <para>
+              <quote>Simple inserts</quote>
+            </para>
+
+            <para>
+              Statements for which the number of rows to be inserted can
+              be determined in advance (when the statement is initially
+              processed). This includes single-row and multiple-row
+              <literal>INSERT</literal>, <literal>INSERT ... ON
+              DUPLICATE KEY UPDATE</literal>, and
+              <literal>REPLACE</literal> statements that do not have a
+              nested subquery.
+            </para>
+          </listitem>
+
+          <listitem>
+            <para>
+              <quote>Bulk inserts</quote>
+            </para>
+
+            <para>
+              Statements for which the number of rows to be inserted
+              (and the number of required auto-increment values) is not
+              known in advance. This includes <literal>INSERT ...
+              SELECT</literal>, <literal>REPLACE ... SELECT</literal>,
+              and <literal>LOAD DATA</literal> statements.
+              <literal>InnoDB</literal> will assign new values for the
+              <literal>AUTO_INCREMENT</literal> column one at a time as
+              each row is processed.
+            </para>
+          </listitem>
+
+          <listitem>
+            <para>
+              <quote>Mixed-mode inserts</quote>
+            </para>
+
+            <para>
+              These are <quote>simple insert</quote> statements that
+              specify the auto-increment value for some (but not all) of
+              the new rows. An example follows, where
+              <literal>c1</literal> is an
+              <literal>AUTO_INCREMENT</literal> column of table
+              <literal>t1</literal>:
+            </para>
+
+<programlisting>
+INSERT INTO t1 (c1,c2) VALUES (1,'a'), (NULL,'b'), (5,'c'), (NULL,'d');
+</programlisting>
+          </listitem>
+
+        </itemizedlist>
+
+        <para>
+          In MySQL &current-series;, there is a new configuration
+          parameter that controls how <literal>InnoDB</literal> uses
+          locking when generating values for
+          <literal>AUTO_INCREMENT</literal> columns. This parameter can
+          be set using the <option>--innodb-autoinc-lock-mode</option>
+          option at <command>mysqld</command> startup, or at runtime by
+          setting the global <literal>innodb_autoinc_lock_mode</literal>
+          system variable. Although this variable can be changed at
+          runtime, the behavior is undefined if you do so while
+          transactions that generate auto-increment values are
+          executing.
+        </para>
+
+        <para>
+          In general, if you encounter problems with the way
+          auto-increment works (which will most likely involve
+          replication), you can force use of the original behavior by
+          setting the lock mode to 0.
+        </para>
+
+        <para>
+          There are three possible settings for the
+          <literal>innodb_autoinc_lock_mode</literal> parameter:
+        </para>
+
+        <itemizedlist>
+
+          <listitem>
+            <para>
+              <literal>innodb_autoinc_lock_mode = 0</literal>
+              (<quote>traditional</quote> lock mode)
+            </para>
+
+            <para>
+              This lock mode provides the same behavior as before
+              <literal>innodb_autoinc_lock_mode</literal> existed. For
+              all <quote><literal>INSERT</literal>-like</quote>
+              statements, a special AUTO-INC table-level lock is
+              obtained and held to the end of the statement. This
+              assures that the auto-increment values assigned by any
+              given statement are consecutive (although
+              <quote>gaps</quote> can exist within a table if a
+              transaction that generated auto-increment values is rolled
+              back, as discussed later).
+            </para>
+
+            <para>
+              This lock mode is provided only for backward compatibility
+              and performance testing. There is little reason to use
+              this lock mode unless you use <quote>mixed-mode
+              inserts</quote> and care about the minor difference in
+              semantics described later.
+            </para>
+          </listitem>
+
+          <listitem>
+            <para>
+              <literal>innodb_autoinc_lock_mode = 1</literal>
+              (<quote>consecutive</quote> lock mode)
+            </para>
+
+            <para>
+              This is the default lock mode. In this mode, <quote>bulk
+              inserts</quote> use the special table-level AUTIONC
+              table-level lock and holds it until the end of the
+              statement. This applies to all <literal>INSERT ...
+              SELECT</literal>, <literal>REPLACE ... SELECT</literal>,
+              and <literal>LOAD DATA</literal> statements. Only one
+              statement holding the AUTO-INC lock can execute at a time.
+            </para>
+
+            <para>
+              With this lock mode, <quote>simple inserts</quote> (only)
+              use a new locking model where a light-weight mutex is used
+              during the allocation of auto-increment values, and no
+              AUTO-INC table-level lock is used, unless an AUTO-INC lock
+              is held by another transaction. If another transaction
+              does hold an AUTO-INC lock, a <quote>simple insert</quote>
+              waits for the AUTO-INC lock, as if it too were a
+              <quote>bulk insert.</quote>
+            </para>
+
+            <para>
+              This lock mode ensures that, in the presence of
+              <literal>INSERT</literal> statements where the number of
+              rows is not known in advance (and where auto-increment
+              numbers are assigned as the statement progresses), all
+              auto-increment values assigned by any
+              <quote><literal>INSERT</literal>-like</quote> statement
+              are consecutive, and operations are safe for
+              statement-based replication.
+            </para>
+
+            <para>
+              Simply put, the important impact of this lock mode is
+              significantly better scalability. This mode is safe for
+              use with statement-based replication. Further, as with
+              <quote>traditional</quote> lock mode, auto-increment
+              numbers assigned by any given statement are
+              <emphasis>consecutive</emphasis>. In this mode, there is
+              <emphasis>no change</emphasis> in semantics compared to
+              <quote>traditional</quote> mode for any statement that
+              uses auto-increment, with one minor exception.
+            </para>
+
+            <para>
+              The exception is for <quote>mixed-mode inserts</quote>,
+              where the user provides explicit values for an
+              <literal>AUTO_INCREMENT</literal> column for some, but not
+              all, rows in a multiple-row <quote>simple insert.</quote>
+              For such inserts, <literal>InnoDB</literal> will allocate
+              more auto-increment values than the number of rows to be
+              inserted. However, all values automatically assigned are
+              consecutively generated (and thus higher than) the
+              auto-increment value generated by the most recently
+              executed previous statement. <quote>Excess</quote> numbers
+              are lost.
+            </para>
+          </listitem>
+
+          <listitem>
+            <para>
+              <literal>innodb_autoinc_lock_mode = 2</literal>
+              (<quote>interleaved</quote> lock mode)
+            </para>
+
+            <para>
+              In this lock mode, no
+              <quote><literal>INSERT</literal>-like</quote> statements
+              use the table-level AUTO-INC lock, and multiple statements
+              can execute at the same time. This is the fastest and most
+              scalable lock mode, but it is <emphasis>not
+              safe</emphasis> when using statement-based replication or
+              recovery scenarios when SQL statements are replayed from
+              the binary log.
+            </para>
+
+            <para>
+              In this lock mode, auto-increment values are guaranteed to
+              be unique and monotonically increasing across all
+              concurrently executing
+              <quote><literal>INSERT</literal>-like</quote> statements.
+              However, because multiple statements can be generating
+              numbers at the same time (that is, allocation of numbers
+              is <emphasis>interleaved</emphasis> across statements),
+              the values generated for the rows inserted by any given
+              statement may not be consecutive.
+            </para>
+
+            <para>
+              If the only statements executing are <quote>simple
+              inserts</quote> where the number of rows to be inserted is
+              known ahead of time, there will be no gaps in the numbers
+              generated for a single statement, except for
+              <quote>mixed-mode inserts.</quote> However, when
+              <quote>bulk inserts</quote> are executed, there may be
+              gaps in the auto-increment values assigned by any given
+              statement.
+            </para>
+          </listitem>
+
+        </itemizedlist>
+
+        <para>
+          The auto-increment locking modes provided by
+          <literal>innodb_autoinc_lock_mode</literal> have several usage
+          implications:
+        </para>
+
+        <itemizedlist>
+
+          <listitem>
+            <para>
+              Using auto-increment with replication
+            </para>
+
+            <para>
+              If you are using statement-based replication, you should
+              set <literal>innodb_autoinc_lock_mode</literal> to 0 or 1
+              and use the same value on the master and its slaves.
+              Auto-increment values are not ensured to be the same on
+              the slaves as on the master if you use
+              <literal>innodb_autoinc_lock_mode</literal> = 2
+              (<quote>interleaved</quote>) or configurations where the
+              master and slaves do not use the same lock mode.
+            </para>
+
+            <para>
+              If you are using row-based replication, all of the
+              auto-increment lock modes are safe. Row-based replication
+              is not sensitive to the order of execution of the SQL
+              statements.
+            </para>
+          </listitem>
+
+          <listitem>
+            <para>
+              <quote>Lost</quote> auto-increment values and sequence
+              gaps
+            </para>
+
+            <para>
+              In all lock modes (0, 1, and 2), if a transaction that
+              generated auto-increment values rolls back, those
+              auto-increment values are <quote>lost.</quote> Once a
+              value is generated for an auto-increment column, it cannot
+              be rolled back, whether or not the
+              <quote><literal>INSERT</literal>-like</quote> statement is
+              completed, and whether or not the containing transaction
+              is rolled back. Such lost values are not reused. Thus,
+              there may be gaps in the values stored in an
+              <literal>AUTO_INCREMENT</literal> column of a table.
+            </para>
+          </listitem>
+
+          <listitem>
+            <para>
+              Auto-increment values assigned by <quote>mixed-mode
+              inserts</quote>
+            </para>
+
+            <para>
+              Consider a <quote>mixed-mode insert,</quote> where a
+              <quote>simple insert</quote> specifies the auto-increment
+              value for some (but not all) resulting rows. Such a
+              statement will behave differently in lock modes 0, 1, and
+              2. For example, assume <literal>c1</literal> is an
+              <literal>AUTO_INCREMENT</literal> column of table
+              <literal>t1</literal>, and that the most recent
+              automatically generated sequence number is 100. Consider
+              the following <quote>mixed-mode insert</quote> statement:
+            </para>
+
+<programlisting>
+INSERT INTO t1 (c1,c2) VALUES (1,'a'), (NULL,'b'), (5,'c'), (NULL,'d');
+</programlisting>
+
+            <para>
+              With <literal>innodb_autoinc_lock_mode</literal> set to 0
+              (<quote>traditional</quote>), the four new rows will be:
+            </para>
+
+<programlisting>
+<!--
+mysql> DROP TABLE IF EXISTS t1;
+mysql> CREATE TABLE t1 (c1 INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY, c2 CHAR(1));
+mysql> INSERT INTO t1 VALUES(1,'a'),(101,'b'),(5,'c'),(102,'d');
+mysql> SELECT c1, c2 FROM t1;
+-->
++-----+------+
+| c1  | c2   |
++-----+------+
+|   1 | a    | 
+| 101 | b    | 
+|   5 | c    | 
+| 102 | d    | 
++-----+------+
+</programlisting>
+
+            <para>
+              The next available auto-increment value will be 103
+              because the auto-increment values are allocated one at a
+              time, not all at once at the beginning of statement
+              execution. This result is true whether or not there are
+              concurrently executing
+              <quote><literal>INSERT</literal>-like</quote> statements
+              (of any type).
+            </para>
+
+            <para>
+              With <literal>innodb_autoinc_lock_mode</literal> set to 1
+              (<quote>consecutive</quote>), the four new rows will also
+              be:
+            </para>
+
+<programlisting>
++-----+------+
+| c1  | c2   |
++-----+------+
+|   1 | a    | 
+| 101 | b    | 
+|   5 | c    | 
+| 102 | d    | 
++-----+------+
+</programlisting>
+
+            <para>
+              However, in this case, the next available auto-increment
+              value will be 105, not 103 because four auto-increment
+              values are allocated at the time the statement is
+              processed, but only two are used. This result is true
+              whether or not there are concurrently executing
+              <quote><literal>INSERT</literal>-like</quote> statements
+              (of any type).
+            </para>
+
+            <para>
+              With <literal>innodb_autoinc_lock_mode</literal> set to
+              mode 2 (<quote>interleaved</quote>), the four new rows
+              will be:
+            </para>
+
+<programlisting>
++-----+------+
+| c1  | c2   |
++-----+------+
+|   1 | a    | 
+|   <replaceable>x</replaceable> | b    | 
+|   5 | c    | 
+|   <replaceable>y</replaceable> | d    | 
++-----+------+
+</programlisting>
+
+            <para>
+              The values of <replaceable>x</replaceable> and
+              <replaceable>y</replaceable> will be unique and larger
+              than any previously generated rows. However, the specific
+              values of <replaceable>x</replaceable> and
+              <replaceable>y</replaceable> will depend on the number of
+              auto-increment values generated by concurrently executing
+              statements.
+            </para>
+
+            <para>
+              Finally, consider the following statement, issued when the
+              most-recently generated sequence number was the value 4:
+            </para>
+
+<programlisting>
+INSERT INTO t1 (c1,c2) VALUES (1,'a'), (NULL,'b'), (5,'c'), (NULL,'d');
+</programlisting>
+
+            <para>
+              With any <literal>innodb_autoinc_lock_mode</literal>
+              setting, this statement will generate a duplicate-key
+              error 23000 (<literal>Can't write; duplicate key in
+              table</literal>) because 5 will be allocated for the row
+              <literal>(NULL, 'b')</literal> and insertion of the row
+              <literal>(5, 'c')</literal> will fail.
+            </para>
+          </listitem>
+
+          <listitem>
+            <para>
+              Gaps in auto-increment values for <quote>bulk
+              inserts</quote>
+            </para>
+
+            <para>
+              With <literal>innodb_autoinc_lock_mode</literal> set to 0
+              (<quote>traditional</quote>) or 1
+              (<quote>consecutive</quote>), the auto-increment values
+              generated by any given statement will be consecutive,
+              without gaps, because the table-level AUTO-INC lock is
+              held until the end of the statement, and only one such
+              statement can execute at a time.
+            </para>
+
+            <para>
+              With <literal>innodb_autoinc_lock_mode</literal> set to 2
+              (<quote>interleaved</quote>), there may be gaps in the
+              auto-increment values generated by <quote>bulk
+              inserts,</quote> but only if there are concurrently
+              executing <quote><literal>INSERT</literal>-like</quote>
+              statements.
+            </para>
+          </listitem>
+
+        </itemizedlist>
+
+      </section>
+
+    </section>
+
+    <section id="innodb-foreign-key-constraints">
+
+      <title><literal>FOREIGN KEY</literal> Constraints</title>
+
+      <remark role="help-category" condition="Data Definition"/>
+
+      <remark role="help-topic" condition="CONSTRAINT"/>
+
+      <remark role="help-keywords">
+        FOREIGN KEY REFERENCES NO ACTION ON DELETE CASCADE SET NULL
+        RESTRICT UPDATE
+      </remark>
+
+      <remark role="help-description-begin"/>
+
+      <para>
+        <literal>InnoDB</literal> also supports foreign key constraints.
+        The syntax for a foreign key constraint definition in
+        <literal>InnoDB</literal> looks like this:
+      </para>
+
+<programlisting>
+[CONSTRAINT <replaceable>symbol</replaceable>] FOREIGN KEY [<replaceable>id</replaceable>] (<replaceable>index_col_name</replaceable>, ...)
+    REFERENCES <replaceable>tbl_name</replaceable> (<replaceable>index_col_name</replaceable>, ...)
+    [ON DELETE {RESTRICT | CASCADE | SET NULL | NO ACTION}]
+    [ON UPDATE {RESTRICT | CASCADE | SET NULL | NO ACTION}]
+</programlisting>
+
+      <remark role="help-description-end"/>
+
+      <para>
+        Foreign keys definitions are subject to the following
+        conditions:
+      </para>
+
+      <itemizedlist>
+
+        <listitem>
+          <para>
+            Both tables must be <literal>InnoDB</literal> tables and
+            they must not be <literal>TEMPORARY</literal> tables.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            Corresponding columns in the foreign key and the referenced
+            key must have similar internal data types inside
+            <literal>InnoDB</literal> so that they can be compared
+            without a type conversion. <emphasis>The size and sign of
+            integer types must be the same</emphasis>. The length of
+            string types need not be the same. For non-binary
+            (character) string columns, the character set and collation
+            must be the same.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            In the referencing table, there must be an index where the
+            foreign key columns are listed as the
+            <emphasis>first</emphasis> columns in the same order. Such
+            an index is created on the referencing table automatically
+            if it does not exist.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            In the referenced table, there must be an index where the
+            referenced columns are listed as the
+            <emphasis>first</emphasis> columns in the same order.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            Index prefixes on foreign key columns are not supported. One
+            consequence of this is that <literal>BLOB</literal> and
+            <literal>TEXT</literal> columns cannot be included in a
+            foreign key, because indexes on those columns must always
+            include a prefix length.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            If the <literal>CONSTRAINT
+            <replaceable>symbol</replaceable></literal> clause is given,
+            the <replaceable>symbol</replaceable> value must be unique
+            in the database. If the clause is not given,
+            <literal>InnoDB</literal> creates the name automatically.
+          </para>
+        </listitem>
+
+      </itemizedlist>
+
+      <para>
+        <literal>InnoDB</literal> rejects any <literal>INSERT</literal>
+        or <literal>UPDATE</literal> operation that attempts to create a
+        foreign key value in a child table if there is no a matching
+        candidate key value in the parent table. The action
+        <literal>InnoDB</literal> takes for any
+        <literal>UPDATE</literal> or <literal>DELETE</literal> operation
+        that attempts to update or delete a candidate key value in the
+        parent table that has some matching rows in the child table is
+        dependent on the <emphasis>referential action</emphasis>
+        specified using <literal>ON UPDATE</literal> and <literal>ON
+        DELETE</literal> subclauses of the <literal>FOREIGN
+        KEY</literal> clause. When the user attempts to delete or update
+        a row from a parent table, and there are one or more matching
+        rows in the child table, <literal>InnoDB</literal> supports five
+        options regarding the action to be taken:
+      </para>
+
+      <itemizedlist>
+
+        <remark role="todo">
+          Rewrite following paragraph to make clearer.
+        </remark>
+
+        <listitem>
+          <para>
+            <literal>CASCADE</literal>: Delete or update the row from
+            the parent table and automatically delete or update the
+            matching rows in the child table. Both <literal>ON DELETE
+            CASCADE</literal> and <literal>ON UPDATE CASCADE</literal>
+            are supported. Between two tables, you should not define
+            several <literal>ON UPDATE CASCADE</literal> clauses that
+            act on the same column in the parent table or in the child
+            table.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            <literal>SET NULL</literal>: Delete or update the row from
+            the parent table and set the foreign key column or columns
+            in the child table to <literal>NULL</literal>. This is valid
+            only if the foreign key columns do not have the <literal>NOT
+            NULL</literal> qualifier specified. Both <literal>ON DELETE
+            SET NULL</literal> and <literal>ON UPDATE SET NULL</literal>
+            clauses are supported.
+          </para>
+
+          <para>
+            If you specify a <literal>SET NULL</literal> action,
+            <emphasis>make sure that you have not declared the columns
+            in the child table as <literal>NOT
+            NULL</literal></emphasis>.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            <literal>NO ACTION</literal>: In standard SQL, <literal>NO
+            ACTION</literal> means <emphasis>no action</emphasis> in the
+            sense that an attempt to delete or update a primary key
+            value is not allowed to proceed if there is a related
+            foreign key value in the referenced table.
+            <literal>InnoDB</literal> rejects the delete or update
+            operation for the parent table.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            <literal>RESTRICT</literal>: Rejects the delete or update
+            operation for the parent table. <literal>NO ACTION</literal>
+            and <literal>RESTRICT</literal> are the same as omitting the
+            <literal>ON DELETE</literal> or <literal>ON UPDATE</literal>
+            clause. (Some database systems have deferred checks, and
+            <literal>NO ACTION</literal> is a deferred check. In MySQL,
+            foreign key constraints are checked immediately, so
+            <literal>NO ACTION</literal> and <literal>RESTRICT</literal>
+            are the same.)
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            <literal>SET DEFAULT</literal>: This action is recognized by
+            the parser, but <literal>InnoDB</literal> rejects table
+            definitions containing <literal>ON DELETE SET
+            DEFAULT</literal> or <literal>ON UPDATE SET
+            DEFAULT</literal> clauses.
+          </para>
+        </listitem>
+
+      </itemizedlist>
+
+      <para>
+        Note that <literal>InnoDB</literal> supports foreign key
+        references within a table. In these cases, <quote>child table
+        records</quote> really refers to dependent records within the
+        same table.
+      </para>
+
+      <para>
+        <literal>InnoDB</literal> requires indexes on foreign keys and
+        referenced keys so that foreign key checks can be fast and not
+        require a table scan. The index on the foreign key is created
+        automatically. This is in contrast to some older versions, in
+        which indexes had to be created explicitly or the creation of
+        foreign key constraints would fail.
+      </para>
+
+      <para>
+        If MySQL reports an error number 1005 from a <literal>CREATE
+        TABLE</literal> statement, and the error message refers to errno
+        150, table creation failed because a foreign key constraint was
+        not correctly formed. Similarly, if an <literal>ALTER
+        TABLE</literal> fails and it refers to errno 150, that means a
+        foreign key definition would be incorrectly formed for the
+        altered table. You can use <literal>SHOW ENGINE INNODB
+        STATUS</literal> to display a detailed explanation of the most
+        recent <literal>InnoDB</literal> foreign key error in the
+        server.
+      </para>
+
+      <note>
+        <para>
+          <literal>InnoDB</literal> does not check foreign key
+          constraints on those foreign key or referenced key values that
+          contain a <literal>NULL</literal> column.
+        </para>
+      </note>
+
+      <note>
+        <para>
+          Currently, triggers are not activated by cascaded foreign key
+          actions.
+        </para>
+      </note>
+
+      <para>
+        You cannot create a table with a column name that matches the
+        name of an internal InnoDB column (including
+        <literal>DB_ROW_ID</literal>, <literal>DB_TRX_ID</literal>,
+        <literal>DB_ROLL_PTR</literal> and
+        <literal>DB_MIX_ID</literal>). The server will report error 1005
+        and refers to <literal>errno</literal> -1 in the error message.
+      </para>
+
+      <para>
+        <emphasis role="bold">Deviation from SQL standards</emphasis>:
+        If there are several rows in the parent table that have the same
+        referenced key value, <literal>InnoDB</literal> acts in foreign
+        key checks as if the other parent rows with the same key value
+        do not exist. For example, if you have defined a
+        <literal>RESTRICT</literal> type constraint, and there is a
+        child row with several parent rows, <literal>InnoDB</literal>
+        does not allow the deletion of any of those parent rows.
+      </para>
+
+      <para>
+        <literal>InnoDB</literal> performs cascading operations through
+        a depth-first algorithm, based on records in the indexes
+        corresponding to the foreign key constraints.
+      </para>
+
+      <para>
+        <emphasis role="bold">Deviation from SQL standards</emphasis>: A
+        <literal>FOREIGN KEY</literal> constraint that references a
+        non-<literal>UNIQUE</literal> key is not standard SQL. It is an
+        <literal>InnoDB</literal> extension to standard SQL.
+      </para>
+
+      <para>
+        <emphasis role="bold">Deviation from SQL standards</emphasis>:
+        If <literal>ON UPDATE CASCADE</literal> or <literal>ON UPDATE
+        SET NULL</literal> recurses to update the <emphasis>same
+        table</emphasis> it has previously updated during the cascade,
+        it acts like <literal>RESTRICT</literal>. This means that you
+        cannot use self-referential <literal>ON UPDATE CASCADE</literal>
+        or <literal>ON UPDATE SET NULL</literal> operations. This is to
+        prevent infinite loops resulting from cascaded updates. A
+        self-referential <literal>ON DELETE SET NULL</literal>, on the
+        other hand, is possible, as is a self-referential <literal>ON
+        DELETE CASCADE</literal>. Cascading operations may not be nested
+        more than 15 levels deep.
+      </para>
+
+      <para>
+        <emphasis role="bold">Deviation from SQL standards</emphasis>:
+        Like MySQL in general, in an SQL statement that inserts,
+        deletes, or updates many rows, <literal>InnoDB</literal> checks
+        <literal>UNIQUE</literal> and <literal>FOREIGN KEY</literal>
+        constraints row-by-row. According to the SQL standard, the
+        default behavior should be deferred checking. That is,
+        constraints are only checked after the <emphasis>entire SQL
+        statement</emphasis> has been processed. Until
+        <literal>InnoDB</literal> implements deferred constraint
+        checking, some things will be impossible, such as deleting a
+        record that refers to itself via a foreign key.
+      </para>
+
+      <para>
+        Here is a simple example that relates <literal>parent</literal>
+        and <literal>child</literal> tables through a single-column
+        foreign key:
+      </para>
+
+<programlisting>
+CREATE TABLE parent (id INT NOT NULL,
+                     PRIMARY KEY (id)
+) ENGINE=INNODB;
+CREATE TABLE child (id INT, parent_id INT,
+                    INDEX par_ind (parent_id),
+                    FOREIGN KEY (parent_id) REFERENCES parent(id)
+                      ON DELETE CASCADE
+) ENGINE=INNODB;
+</programlisting>
+
+      <para>
+        A more complex example in which a
+        <literal>product_order</literal> table has foreign keys for two
+        other tables. One foreign key references a two-column index in
+        the <literal>product</literal> table. The other references a
+        single-column index in the <literal>customer</literal> table:
+      </para>
+
+      <remark role="help-example"/>
+
+<programlisting>
+CREATE TABLE product (category INT NOT NULL, id INT NOT NULL,
+                      price DECIMAL,
+                      PRIMARY KEY(category, id)) ENGINE=INNODB;
+CREATE TABLE customer (id INT NOT NULL,
+                       PRIMARY KEY (id)) ENGINE=INNODB;
+CREATE TABLE product_order (no INT NOT NULL AUTO_INCREMENT,
+                            product_category INT NOT NULL,
+                            product_id INT NOT NULL,
+                            customer_id INT NOT NULL,
+                            PRIMARY KEY(no),
+                            INDEX (product_category, product_id),
+                            FOREIGN KEY (product_category, product_id)
+                              REFERENCES product(category, id)
+                              ON UPDATE CASCADE ON DELETE RESTRICT,
+                            INDEX (customer_id),
+                            FOREIGN KEY (customer_id)
+                              REFERENCES customer(id)) ENGINE=INNODB;
+</programlisting>
+
+      <para>
+        <literal>InnoDB</literal> allows you to add a new foreign key
+        constraint to a table by using <literal>ALTER TABLE</literal>:
+      </para>
+
+<programlisting>
+ALTER TABLE <replaceable>tbl_name</replaceable>
+    ADD [CONSTRAINT <replaceable>symbol</replaceable>] FOREIGN KEY [<replaceable>id</replaceable>] (<replaceable>index_col_name</replaceable>, ...)
+    REFERENCES <replaceable>tbl_name</replaceable> (<replaceable>index_col_name</replaceable>, ...)
+    [ON DELETE {RESTRICT | CASCADE | SET NULL | NO ACTION}]
+    [ON UPDATE {RESTRICT | CASCADE | SET NULL | NO ACTION}]
+</programlisting>
+
+      <para>
+        <emphasis role="bold">Remember to create the required indexes
+        first</emphasis>. You can also add a self-referential foreign
+        key constraint to a table using <literal>ALTER TABLE</literal>.
+      </para>
+
+      <indexterm>
+        <primary>DROP FOREIGN KEY</primary>
+      </indexterm>
+
+      <indexterm>
+        <primary>deleting</primary>
+        <secondary>foreign key</secondary>
+      </indexterm>
+
+      <indexterm>
+        <primary>foreign key</primary>
+        <secondary>deleting</secondary>
+      </indexterm>
+
+      <para>
+        <literal>InnoDB</literal> also supports the use of
+        <literal>ALTER TABLE</literal> to drop foreign keys:
+      </para>
+
+<programlisting>
+ALTER TABLE <replaceable>tbl_name</replaceable> DROP FOREIGN KEY <replaceable>fk_symbol</replaceable>;
+</programlisting>
+
+      <para>
+        If the <literal>FOREIGN KEY</literal> clause included a
+        <literal>CONSTRAINT</literal> name when you created the foreign
+        key, you can refer to that name to drop the foreign key.
+        Otherwise, the <replaceable>fk_symbol</replaceable> value is
+        internally generated by <literal>InnoDB</literal> when the
+        foreign key is created. To find out the symbol value when you
+        want to drop a foreign key, use the <literal>SHOW CREATE
+        TABLE</literal> statement. For example:
+      </para>
+
+<programlisting>
+mysql&gt; <userinput>SHOW CREATE TABLE ibtest11c\G</userinput>
+*************************** 1. row ***************************
+       Table: ibtest11c
+Create Table: CREATE TABLE `ibtest11c` (
+  `A` int(11) NOT NULL auto_increment,
+  `D` int(11) NOT NULL default '0',
+  `B` varchar(200) NOT NULL default '',
+  `C` varchar(175) default NULL,
+  PRIMARY KEY  (`A`,`D`,`B`),
+  KEY `B` (`B`,`C`),
+  KEY `C` (`C`),
+  CONSTRAINT `0_38775` FOREIGN KEY (`A`, `D`)
+REFERENCES `ibtest11a` (`A`, `D`)
+ON DELETE CASCADE ON UPDATE CASCADE,
+  CONSTRAINT `0_38776` FOREIGN KEY (`B`, `C`)
+REFERENCES `ibtest11a` (`B`, `C`)
+ON DELETE CASCADE ON UPDATE CASCADE
+) ENGINE=INNODB CHARSET=latin1
+1 row in set (0.01 sec)
+
+mysql&gt; <userinput>ALTER TABLE ibtest11c DROP FOREIGN KEY `0_38775`;</userinput>
+</programlisting>
+
+      <para>
+        You cannot add a foreign key and drop a foreign key in separate
+        clauses of a single <literal>ALTER TABLE</literal> statement.
+        Separate statements are required.
+      </para>
+
+      <para>
+        If <literal>ALTER TABLE</literal> for an
+        <literal>InnoDB</literal> table results in changes to column
+        values (for example, because a column is truncated),
+        <literal>InnoDB</literal>'s <literal>FOREIGN KEY</literal>
+        constraint checks do not notice possible violations caused by
+        changing the values.
+      </para>
+
+      <para>
+        The <literal>InnoDB</literal> parser allows table and column
+        identifiers in a <literal>FOREIGN KEY ... REFERENCES
+        ...</literal> clause to be quoted within backticks.
+        (Alternatively, double quotes can be used if the
+        <literal>ANSI_QUOTES</literal> SQL mode is enabled.) The
+        <literal>InnoDB</literal> parser also takes into account the
+        setting of the <literal>lower_case_table_names</literal> system
+        variable.
+      </para>
+
+      <para>
+        <literal>InnoDB</literal> returns a table's foreign key
+        definitions as part of the output of the <literal>SHOW CREATE
+        TABLE</literal> statement:
+      </para>
+
+<programlisting>
+SHOW CREATE TABLE <replaceable>tbl_name</replaceable>;
+</programlisting>
+
+      <para>
+        <command>mysqldump</command> also produces correct definitions
+        of tables to the dump file, and does not forget about the
+        foreign keys.
+      </para>
+
+      <para>
+        You can also display the foreign key constraints for a table
+        like this:
+      </para>
+
+<programlisting>
+SHOW TABLE STATUS FROM <replaceable>db_name</replaceable> LIKE '<replaceable>tbl_name</replaceable>';
+</programlisting>
+
+      <para>
+        The foreign key constraints are listed in the
+        <literal>Comment</literal> column of the output.
+      </para>
+
+      <para>
+        When performing foreign key checks, <literal>InnoDB</literal>
+        sets shared row-level locks on child or parent records it has to
+        look at. <literal>InnoDB</literal> checks foreign key
+        constraints immediately; the check is not deferred to
+        transaction commit.
+      </para>
+
+      <para>
+        To make it easier to reload dump files for tables that have
+        foreign key relationships, <command>mysqldump</command>
+        automatically includes a statement in the dump output to set
+        <literal>FOREIGN_KEY_CHECKS</literal> to 0. This avoids problems
+        with tables having to be reloaded in a particular order when the
+        dump is reloaded. It is also possible to set this variable
+        manually:
+      </para>
+
+<programlisting>
+mysql&gt; <userinput>SET FOREIGN_KEY_CHECKS = 0;</userinput>
+mysql&gt; <userinput>SOURCE <replaceable>dump_file_name</replaceable>;</userinput>
+mysql&gt; <userinput>SET FOREIGN_KEY_CHECKS = 1;</userinput>
+</programlisting>
+
+      <para>
+        This allows you to import the tables in any order if the dump
+        file contains tables that are not correctly ordered for foreign
+        keys. It also speeds up the import operation. Setting
+        <literal>FOREIGN_KEY_CHECKS</literal> to 0 can also be useful
+        for ignoring foreign key constraints during <literal>LOAD
+        DATA</literal> and <literal>ALTER TABLE</literal> operations.
+        However, even if <literal>FOREIGN_KEY_CHECKS=0</literal>, InnoDB
+        does not allow the creation of a foreign key constraint where a
+        column references a non-matching column type. Also, if an
+        <literal>InnoDB</literal> table has foreign key constraints,
+        <literal>ALTER TABLE</literal> cannot be used to change the
+        table to use another storage engine. To alter the storage
+        engine, you must drop any foreign key constraints first.
+      </para>
+
+      <para>
+        <literal>InnoDB</literal> does not allow you to drop a table
+        that is referenced by a <literal>FOREIGN KEY</literal>
+        constraint, unless you do <literal>SET
+        FOREIGN_KEY_CHECKS=0</literal>. When you drop a table, the
+        constraints that were defined in its create statement are also
+        dropped.
+      </para>
+
+      <para>
+        If you re-create a table that was dropped, it must have a
+        definition that conforms to the foreign key constraints
+        referencing it. It must have the right column names and types,
+        and it must have indexes on the referenced keys, as stated
+        earlier. If these are not satisfied, MySQL returns error number
+        1005 and refers to errno 150 in the error message.
+      </para>
+
+    </section>
+
+    <section id="innodb-and-mysql-replication">
+
+      <title><literal>InnoDB</literal> and MySQL Replication</title>
+
+      <para>
+        MySQL replication works for <literal>InnoDB</literal> tables as
+        it does for <literal>MyISAM</literal> tables. It is also
+        possible to use replication in a way where the storage engine on
+        the slave is not the same as the original storage engine on the
+        master. For example, you can replicate modifications to an
+        <literal>InnoDB</literal> table on the master to a
+        <literal>MyISAM</literal> table on the slave.
+      </para>
+
+      <para>
+        To set up a new slave for a master, you have to make a copy of
+        the <literal>InnoDB</literal> tablespace and the log files, as
+        well as the <filename>.frm</filename> files of the
+        <literal>InnoDB</literal> tables, and move the copies to the
+        slave. If the <literal>innodb_file_per_table</literal> variable
+        is enabled, you must also copy the <filename>.ibd</filename>
+        files as well. For the proper procedure to do this, see
+        <xref linkend="innodb-backup"/>.
+      </para>
+
+      <para>
+        If you can shut down the master or an existing slave, you can
+        take a cold backup of the <literal>InnoDB</literal> tablespace
+        and log files and use that to set up a slave. To make a new
+        slave without taking down any server you can also use the
+        non-free (commercial)
+        <ulink url="http://www.innodb.com/hot-backup"><literal>InnoDB
+        Hot Backup</literal> tool</ulink>.
+      </para>
+
+      <para>
+        You cannot set up replication for <literal>InnoDB</literal>
+        using the <literal>LOAD TABLE FROM MASTER</literal> statement,
+        which works only for <literal>MyISAM</literal> tables. There are
+        two possible workarounds:
+      </para>
+
+      <itemizedlist>
+
+        <listitem>
+          <para>
+            Dump the table on the master and import the dump file into
+            the slave.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            Use <literal>ALTER TABLE <replaceable>tbl_name</replaceable>
+            ENGINE=MyISAM</literal> on the master before setting up
+            replication with <literal>LOAD TABLE
+            <replaceable>tbl_name</replaceable> FROM MASTER</literal>,
+            and then use <literal>ALTER TABLE</literal> to convert the
+            master table back to <literal>InnoDB</literal> afterward.
+            However, this should not be done for tables that have
+            foreign key definitions because the definitions will be
+            lost.
+          </para>
+        </listitem>
+
+      </itemizedlist>
+
+      <para>
+        Transactions that fail on the master do not affect replication
+        at all. MySQL replication is based on the binary log where MySQL
+        writes SQL statements that modify data. A transaction that fails
+        (for example, because of a foreign key violation, or because it
+        is rolled back) is not written to the binary log, so it is not
+        sent to slaves. See <xref linkend="commit"/>.
+      </para>
+
+      <important>
+        <para>
+          Cascading actions for <literal>InnoDB</literal> tables on the
+          master are <emphasis>not</emphasis> replicated to the slave.
+          For example, suppose you have two tables defined and populated
+          on both the master and the slave as shown here:
+
+<programlisting>
+CREATE TABLE fc1 (
+    i INT PRIMARY KEY, 
+    j INT
+) ENGINE = InnoDB;
+
+CREATE TABLE fc2 (
+    m INT PRIMARY KEY, 
+    n INT, 
+    FOREIGN KEY mi (m) REFERENCES fc1 (i) 
+        ON DELETE CASCADE
+) ENGINE = InnoDB;
+
+INSERT INTO fc1 VALUES (1, 1), (2, 2);
+INSERT INTO fc2 VALUES (1, 1), (2, 2), (3, 1);
+</programlisting>
+
+          At this point, on both the master and the slave, table
+          <literal>fc1</literal> contains 2 rows, and table
+          <literal>fc2</literal> contains 3 rows. Now suppose that you
+          perform the following <literal>DELETE</literal> statement on
+          the master:
+
+<programlisting>
+DELETE FROM fc1 WHERE i = 1;
+</programlisting>
+
+          Due to the cascade, table <literal>fc2</literal> on the master
+          now contains only 1 row:
+
+<programlisting>
+mysql&gt; <userinput>SELECT * FROM fc2;</userinput>
++---+---+
+| m | n |
++---+---+
+| 2 | 2 |
++---+---+
+1 row in set (0.00 sec)
+</programlisting>
+
+          However, the cascade does not propagate to the slave. The
+          slave&apos;s copy of <literal>fc2</literal> still contains all
+          of the rows that were originally inserted:
+
+<programlisting>
+mysql&gt; <userinput>SELECT * FROM fc2;</userinput>
++---+---+
+| m | n |
++---+---+
+| 1 | 1 |
+| 3 | 1 |
+| 2 | 2 |
++---+---+
+3 rows in set (0.00 sec)
+</programlisting>
+        </para>
+
+        <para>
+          This is true whether you are using row-based replication or
+          statement-based replication.
+        </para>
+      </important>
+
+    </section>
+
+  </section>
+
+  <section id="adding-and-removing">
+
+    <title>Adding and Removing <literal>InnoDB</literal> Data and Log Files</title>
+
+    <para>
+      This section describes what you can do when your
+      <literal>InnoDB</literal> tablespace runs out of room or when you
+      want to change the size of the log files.
+    </para>
+
+    <para>
+      The easiest way to increase the size of the
+      <literal>InnoDB</literal> tablespace is to configure it from the
+      beginning to be auto-extending. Specify the
+      <literal>autoextend</literal> attribute for the last data file in
+      the tablespace definition. Then <literal>InnoDB</literal>
+      increases the size of that file automatically in 8MB increments
+      when it runs out of space. The increment size can be changed by
+      setting the value of the
+      <literal>innodb_autoextend_increment</literal> system variable,
+      which is measured in MB.
+    </para>
+
+    <para>
+      Alternatively, you can increase the size of your tablespace by
+      adding another data file. To do this, you have to shut down the
+      MySQL server, change the tablespace configuration to add a new
+      data file to the end of <literal>innodb_data_file_path</literal>,
+      and start the server again.
+    </para>
+
+    <para>
+      If your last data file was defined with the keyword
+      <literal>autoextend</literal>, the procedure for reconfiguring the
+      tablespace must take into account the size to which the last data
+      file has grown. Obtain the size of the data file, round it down to
+      the closest multiple of 1024 &times; 1024 bytes (= 1MB), and
+      specify the rounded size explicitly in
+      <literal>innodb_data_file_path</literal>. Then you can add another
+      data file. Remember that only the last data file in the
+      <literal>innodb_data_file_path</literal> can be specified as
+      auto-extending.
+    </para>
+
+    <para>
+      As an example, assume that the tablespace has just one
+      auto-extending data file <filename>ibdata1</filename>:
+    </para>
+
+<programlisting>
+innodb_data_home_dir =
+innodb_data_file_path = /ibdata/ibdata1:10M:autoextend
+</programlisting>
+
+    <para>
+      Suppose that this data file, over time, has grown to 988MB. Here
+      is the configuration line after modifying the original data file
+      to not be auto-extending and adding another auto-extending data
+      file:
+    </para>
+
+<programlisting>
+innodb_data_home_dir =
+innodb_data_file_path = /ibdata/ibdata1:988M;/disk2/ibdata2:50M:autoextend
+</programlisting>
+
+    <para>
+      When you add a new file to the tablespace configuration, make sure
+      that it does not exist. <literal>InnoDB</literal> will create and
+      initialize the file when you restart the server.
+    </para>
+
+    <para>
+      Currently, you cannot remove a data file from the tablespace. To
+      decrease the size of your tablespace, use this procedure:
+    </para>
+
+    <orderedlist>
+
+      <listitem>
+        <para>
+          Use <command>mysqldump</command> to dump all your
+          <literal>InnoDB</literal> tables.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para>
+          Stop the server.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para>
+          Remove all the existing tablespace files.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para>
+          Configure a new tablespace.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para>
+          Restart the server.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para>
+          Import the dump files.
+        </para>
+      </listitem>
+
+    </orderedlist>
+
+    <para>
+      If you want to change the number or the size of your
+      <literal>InnoDB</literal> log files, use the following
+      instructions. The procedure to use depends on the value of
+      <literal>innodb_fast_shutdown</literal>:
+    </para>
+
+    <itemizedlist>
+
+      <listitem>
+        <para>
+          If <literal>innodb_fast_shutdown</literal> is not set to 2:
+          You must stop the MySQL server and make sure that it shuts
+          down without errors (to ensure that there is no information
+          for outstanding transactions in the logs). Then copy the old
+          log files into a safe place just in case something went wrong
+          in the shutdown and you need them to recover the tablespace.
+          Delete the old log files from the log file directory, edit
+          <filename>my.cnf</filename> to change the log file
+          configuration, and start the MySQL server again.
+          <command>mysqld</command> sees that no log files exist at
+          startup and tells you that it is creating new ones.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para>
+          If <literal>innodb_fast_shutdown</literal> is set to 2: You
+          should shut down the server, set
+          <literal>innodb_fast_shutdown</literal> to 1, and restart the
+          server. The server should be allowed to recover. Then you
+          should shut down the server again and follow the procedure
+          described in the preceding item to change
+          <literal>InnoDB</literal> log file size. Set
+          <literal>innodb_fast_shutdown</literal> back to 2 and restart
+          the server.
+        </para>
+      </listitem>
+
+    </itemizedlist>
+
+  </section>
+
+  <section id="innodb-backup">
+
+    <title>Backing Up and Recovering an <literal>InnoDB</literal> Database</title>
+
+    <para>
+      The key to safe database management is making regular backups.
+    </para>
+
+    <para>
+      <command>InnoDB Hot Backup</command> is an online backup tool you
+      can use to backup your <literal>InnoDB</literal> database while it
+      is running. <command>InnoDB Hot Backup</command> does not require
+      you to shut down your database and it does not set any locks or
+      disturb your normal database processing. <command>InnoDB Hot
+      Backup</command> is a non-free (commercial) add-on tool with an
+      annual license fee of &euro;390 per computer on which the MySQL
+      server is run. See the
+      <ulink url="http://www.innodb.com/hot-backup"><command>InnoDB Hot
+      Backup</command> home page</ulink> for detailed information and
+      screenshots.
+    </para>
+
+    <para>
+      If you are able to shut down your MySQL server, you can make a
+      binary backup that consists of all files used by
+      <literal>InnoDB</literal> to manage its tables. Use the following
+      procedure:
+    </para>
+
+    <orderedlist>
+
+      <listitem>
+        <para>
+          Shut down your MySQL server and make sure that it shuts down
+          without errors.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para>
+          Copy all your data files (<filename>ibdata</filename> files
+          and <filename>.ibd</filename> files) into a safe place.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para>
+          Copy all your <filename>ib_logfile</filename> files to a safe
+          place.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para>
+          Copy your <filename>my.cnf</filename> configuration file or
+          files to a safe place.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para>
+          Copy all the <filename>.frm</filename> files for your
+          <literal>InnoDB</literal> tables to a safe place.
+        </para>
+      </listitem>
+
+    </orderedlist>
+
+    <para>
+      Replication works with <literal>InnoDB</literal> tables, so you
+      can use MySQL replication capabilities to keep a copy of your
+      database at database sites requiring high availability.
+    </para>
+
+    <para>
+      In addition to making binary backups as just described, you should
+      also regularly make dumps of your tables with
+      <command>mysqldump</command>. The reason for this is that a binary
+      file might be corrupted without you noticing it. Dumped tables are
+      stored into text files that are human-readable, so spotting table
+      corruption becomes easier. Also, because the format is simpler,
+      the chance for serious data corruption is smaller.
+      <command>mysqldump</command> also has a
+      <option>--single-transaction</option> option that you can use to
+      make a consistent snapshot without locking out other clients.
+    </para>
+
+    <para>
+      To be able to recover your <literal>InnoDB</literal> database to
+      the present from the binary backup just described, you have to run
+      your MySQL server with binary logging turned on. Then you can
+      apply the binary log to the backup database to achieve
+      point-in-time recovery:
+    </para>
+
+<programlisting>
+mysqlbinlog <replaceable>yourhostname</replaceable>-bin.123 | mysql
+</programlisting>
+
+    <para>
+      To recover from a crash of your MySQL server, the only requirement
+      is to restart it. <literal>InnoDB</literal> automatically checks
+      the logs and performs a roll-forward of the database to the
+      present. <literal>InnoDB</literal> automatically rolls back
+      uncommitted transactions that were present at the time of the
+      crash. During recovery, <command>mysqld</command> displays output
+      something like this:
+    </para>
+
+<programlisting>
+InnoDB: Database was not shut down normally.
+InnoDB: Starting recovery from log files...
+InnoDB: Starting log scan based on checkpoint at
+InnoDB: log sequence number 0 13674004
+InnoDB: Doing recovery: scanned up to log sequence number 0 13739520
+InnoDB: Doing recovery: scanned up to log sequence number 0 13805056
+InnoDB: Doing recovery: scanned up to log sequence number 0 13870592
+InnoDB: Doing recovery: scanned up to log sequence number 0 13936128
+...
+InnoDB: Doing recovery: scanned up to log sequence number 0 20555264
+InnoDB: Doing recovery: scanned up to log sequence number 0 20620800
+InnoDB: Doing recovery: scanned up to log sequence number 0 20664692
+InnoDB: 1 uncommitted transaction(s) which must be rolled back
+InnoDB: Starting rollback of uncommitted transactions
+InnoDB: Rolling back trx no 16745
+InnoDB: Rolling back of trx no 16745 completed
+InnoDB: Rollback of uncommitted transactions completed
+InnoDB: Starting an apply batch of log records to the database...
+InnoDB: Apply batch completed
+InnoDB: Started
+mysqld: ready for connections
+</programlisting>
+
+    <para>
+      If your database gets corrupted or your disk fails, you have to do
+      the recovery from a backup. In the case of corruption, you should
+      first find a backup that is not corrupted. After restoring the
+      base backup, do the recovery from the binary log files using
+      <command>mysqlbinlog</command> and <command>mysql</command> to
+      restore the changes performed after the backup was made.
+    </para>
+
+    <para>
+      In some cases of database corruption it is enough just to dump,
+      drop, and re-create one or a few corrupt tables. You can use the
+      <literal>CHECK TABLE</literal> SQL statement to check whether a
+      table is corrupt, although <literal>CHECK TABLE</literal>
+      naturally cannot detect every possible kind of corruption. You can
+      use <literal>innodb_tablespace_monitor</literal> to check the
+      integrity of the file space management inside the tablespace
+      files.
+    </para>
+
+    <para>
+      In some cases, apparent database page corruption is actually due
+      to the operating system corrupting its own file cache, and the
+      data on disk may be okay. It is best first to try restarting your
+      computer. Doing so may eliminate errors that appeared to be
+      database page corruption.
+    </para>
+
+    <section id="forcing-recovery">
+
+      <title>Forcing <literal>InnoDB</literal> Recovery</title>
+
+      <para>
+        If there is database page corruption, you may want to dump your
+        tables from the database with <literal>SELECT INTO
+        OUTFILE</literal>. Usually, most of the data obtained in this
+        way is intact. Even so, the corruption may cause <literal>SELECT
+        * FROM <replaceable>tbl_name</replaceable></literal> statements
+        or <literal>InnoDB</literal> background operations to crash or
+        assert, or even to cause <literal>InnoDB</literal> roll-forward
+        recovery to crash. However, you can force the
+        <literal>InnoDB</literal> storage engine to start up while
+        preventing background operations from running, so that you are
+        able to dump your tables. For example, you can add the following
+        line to the <literal>[mysqld]</literal> section of your option
+        file before restarting the server:
+      </para>
+
+<programlisting>
+[mysqld]
+innodb_force_recovery = 4
+</programlisting>
+
+      <para>
+        The allowable non-zero values for
+        <literal>innodb_force_recovery</literal> follow. A larger number
+        includes all precautions of smaller numbers. If you are able to
+        dump your tables with an option value of at most 4, then you are
+        relatively safe that only some data on corrupt individual pages
+        is lost. A value of 6 is more drastic because database pages are
+        left in an obsolete state, which in turn may introduce more
+        corruption into B-trees and other database structures.
+      </para>
+
+      <itemizedlist>
+
+        <listitem>
+          <para>
+            <literal>1</literal>
+            (<literal>SRV_FORCE_IGNORE_CORRUPT</literal>)
+          </para>
+
+          <para>
+            Let the server run even if it detects a corrupt page. Try to
+            make <literal>SELECT * FROM
+            <replaceable>tbl_name</replaceable></literal> jump over
+            corrupt index records and pages, which helps in dumping
+            tables.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            <literal>2</literal>
+            (<literal>SRV_FORCE_NO_BACKGROUND</literal>)
+          </para>
+
+          <para>
+            Prevent the main thread from running. If a crash would occur
+            during the purge operation, this recovery value prevents it.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            <literal>3</literal>
+            (<literal>SRV_FORCE_NO_TRX_UNDO</literal>)
+          </para>
+
+          <para>
+            Do not run transaction rollbacks after recovery.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            <literal>4</literal>
+            (<literal>SRV_FORCE_NO_IBUF_MERGE</literal>)
+          </para>
+
+          <para>
+            Prevent also insert buffer merge operations. If they would
+            cause a crash, do not do them. Do not calculate table
+            statistics.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            <literal>5</literal>
+            (<literal>SRV_FORCE_NO_UNDO_LOG_SCAN</literal>)
+          </para>
+
+          <para>
+            Do not look at undo logs when starting the database:
+            <literal>InnoDB</literal> treats even incomplete
+            transactions as committed.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            <literal>6</literal>
+            (<literal>SRV_FORCE_NO_LOG_REDO</literal>)
+          </para>
+
+          <para>
+            Do not do the log roll-forward in connection with recovery.
+          </para>
+        </listitem>
+
+      </itemizedlist>
+
+      <para>
+        You can <literal>SELECT</literal> from tables to dump them, or
+        <literal>DROP</literal> or <literal>CREATE</literal> tables even
+        if forced recovery is used. If you know that a given table is
+        causing a crash on rollback, you can drop it. You can also use
+        this to stop a runaway rollback caused by a failing mass import
+        or <literal>ALTER TABLE</literal>. You can kill the
+        <command>mysqld</command> process and set
+        <literal>innodb_force_recovery</literal> to <literal>3</literal>
+        to bring the database up without the rollback, then
+        <literal>DROP</literal> the table that is causing the runaway
+        rollback.
+      </para>
+
+      <para>
+        <emphasis>The database must not otherwise be used with any
+        non-zero value of
+        <literal>innodb_force_recovery</literal></emphasis>. As a safety
+        measure, <literal>InnoDB</literal> prevents users from
+        performing <literal>INSERT</literal>, <literal>UPDATE</literal>,
+        or <literal>DELETE</literal> operations when
+        <literal>innodb_force_recovery</literal> is greater than 0.
+      </para>
+
+    </section>
+
+    <section id="innodb-checkpoints">
+
+      <title>Checkpoints</title>
+
+      <para>
+        <literal>InnoDB</literal> implements a checkpoint mechanism
+        known as <quote>fuzzy</quote> checkpointing.
+        <literal>InnoDB</literal> flushes modified database pages from
+        the buffer pool in small batches. There is no need to flush the
+        buffer pool in one single batch, which would in practice stop
+        processing of user SQL statements during the checkpointing
+        process.
+      </para>
+
+      <para>
+        During crash recovery, <literal>InnoDB</literal> looks for a
+        checkpoint label written to the log files. It knows that all
+        modifications to the database before the label are present in
+        the disk image of the database. Then <literal>InnoDB</literal>
+        scans the log files forward from the checkpoint, applying the
+        logged modifications to the database.
+      </para>
+
+      <para>
+        <literal>InnoDB</literal> writes to its log files on a rotating
+        basis. All committed modifications that make the database pages
+        in the buffer pool different from the images on disk must be
+        available in the log files in case <literal>InnoDB</literal> has
+        to do a recovery. This means that when <literal>InnoDB</literal>
+        starts to reuse a log file, it has to make sure that the
+        database page images on disk contain the modifications logged in
+        the log file that <literal>InnoDB</literal> is going to reuse.
+        In other words, <literal>InnoDB</literal> must create a
+        checkpoint and this often involves flushing of modified database
+        pages to disk.
+      </para>
+
+      <para>
+        The preceding description explains why making your log files
+        very large may save disk I/O in checkpointing. It often makes
+        sense to set the total size of the log files as big as the
+        buffer pool or even bigger. The drawback of using large log
+        files is that crash recovery can take longer because there is
+        more logged information to apply to the database.
+      </para>
+
+    </section>
+
+  </section>
+
+  <section id="moving">
+
+    <title>Moving an <literal>InnoDB</literal> Database to Another Machine</title>
+
+    <para>
+      On Windows, <literal>InnoDB</literal> always stores database and
+      table names internally in lowercase. To move databases in a binary
+      format from Unix to Windows or from Windows to Unix, you should
+      have all table and database names in lowercase. A convenient way
+      to accomplish this is to add the following line to the
+      <literal>[mysqld]</literal> section of your
+      <filename>my.cnf</filename> or <filename>my.ini</filename> file
+      before creating any databases or tables:
+    </para>
+
+<programlisting>
+[mysqld]
+lower_case_table_names=1
+</programlisting>
+
+    <para>
+      Like <literal>MyISAM</literal> data files,
+      <literal>InnoDB</literal> data and log files are binary-compatible
+      on all platforms having the same floating-point number format. You
+      can move an <literal>InnoDB</literal> database simply by copying
+      all the relevant files listed in <xref linkend="innodb-backup"/>.
+      If the floating-point formats differ but you have not used
+      <literal>FLOAT</literal> or <literal>DOUBLE</literal> data types
+      in your tables, then the procedure is the same: simply copy the
+      relevant files. If the formats differ and your tables contain
+      floating-point data, you must use <command>mysqldump</command> to
+      dump your tables on one machine and then import the dump files on
+      the other machine.
+    </para>
+
+    <para>
+      One way to increase performance is to switch off autocommit mode
+      when importing data, assuming that the tablespace has enough space
+      for the big rollback segment that the import transactions
+      generate. Do the commit only after importing a whole table or a
+      segment of a table.
+    </para>
+
+  </section>
+
+  <section id="innodb-transaction-model">
+
+    <title><literal>InnoDB</literal> Transaction Model and Locking</title>
+
+    <para>
+      In the <literal>InnoDB</literal> transaction model, the goal is to
+      combine the best properties of a multi-versioning database with
+      traditional two-phase locking. <literal>InnoDB</literal> does
+      locking on the row level and runs queries as non-locking
+      consistent reads by default, in the style of Oracle. The lock
+      table in <literal>InnoDB</literal> is stored so space-efficiently
+      that lock escalation is not needed: Typically several users are
+      allowed to lock every row in the database, or any random subset of
+      the rows, without <literal>InnoDB</literal> running out of memory.
+    </para>
+
+    <section id="innodb-lock-modes">
+
+      <title><literal>InnoDB</literal> Lock Modes</title>
+
+      <para>
+        <literal>InnoDB</literal> implements standard row-level locking
+        where there are two types of locks:
+      </para>
+
+      <itemizedlist>
+
+        <listitem>
+          <para>
+            A shared (<replaceable>S</replaceable>) lock allows a
+            transaction to read a row (tuple).
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            An exclusive (<replaceable>X</replaceable>) lock allows a
+            transaction to update or delete a row.
+          </para>
+        </listitem>
+
+      </itemizedlist>
+
+      <para>
+        If transaction <literal>T1</literal> holds a shared
+        (<replaceable>S</replaceable>) lock on tuple
+        <literal>t</literal>, then
+      </para>
+
+      <itemizedlist>
+
+        <listitem>
+          <para>
+            A request from some distinct transaction
+            <literal>T2</literal> for an <replaceable>S</replaceable>
+            lock on <literal>t</literal> can be granted immediately. As
+            a result, both <literal>T1</literal> and
+            <literal>T2</literal> hold an <replaceable>S</replaceable>
+            lock on <literal>t</literal>.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            A request from some distinct transaction
+            <literal>T2</literal> for an <replaceable>X</replaceable>
+            lock on <literal>t</literal> cannot be granted immediately.
+          </para>
+        </listitem>
+
+      </itemizedlist>
+
+      <para>
+        If a transaction <literal>T1</literal> holds an exclusive
+        (<replaceable>X</replaceable>) lock on tuple
+        <literal>t</literal>, then a request from some distinct
+        transaction <literal>T2</literal> for a lock of either type on
+        <literal>t</literal> cannot be granted immediately. Instead,
+        transaction <literal>T2</literal> has to wait for transaction
+        <literal>T1</literal> to release its lock on tuple
+        <literal>t</literal>.
+      </para>
+
+      <para>
+        Additionally, <literal>InnoDB</literal> supports
+        <emphasis>multiple granularity locking</emphasis> which allows
+        coexistence of record locks and locks on entire tables. To make
+        locking at multiple granularity levels practical, additional
+        types of locks called <emphasis>intention locks</emphasis> are
+        used. Intention locks are table locks in
+        <literal>InnoDB</literal>. The idea behind intention locks is
+        for a transaction to indicate which type of lock (shared or
+        exclusive) it will require later for a row in that table. There
+        are two types of intention locks used in
+        <literal>InnoDB</literal> (assume that transaction
+        <literal>T</literal> has requested a lock of the indicated type
+        on table <literal>R</literal>):
+      </para>
+
+      <itemizedlist>
+
+        <listitem>
+          <para>
+            Intention shared (<replaceable>IS</replaceable>):
+            Transaction <literal>T</literal> intends to set
+            <replaceable>S</replaceable> locks on individual rows in
+            table <literal>R</literal>.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            Intention exclusive (<replaceable>IX</replaceable>):
+            Transaction <literal>T</literal> intends to set
+            <replaceable>X</replaceable> locks on those rows.
+          </para>
+        </listitem>
+
+      </itemizedlist>
+
+      <para>
+        The intention locking protocol is as follows:
+      </para>
+
+      <itemizedlist>
+
+        <listitem>
+          <para>
+            Before a given transaction can acquire an
+            <replaceable>S</replaceable> lock on a given row, it must
+            first acquire an <replaceable>IS</replaceable> or stronger
+            lock on the table containing that row.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            Before a given transaction can acquire an
+            <replaceable>X</replaceable> lock on a given row, it must
+            first acquire an <replaceable>IX</replaceable> lock on the
+            table containing that row.
+          </para>
+        </listitem>
+
+      </itemizedlist>
+
+      <para>
+        These rules can be conveniently summarized by means of a
+        <emphasis>lock type compatibility matrix</emphasis>:
+      </para>
+
+      <informaltable>
+        <tgroup cols="5">
+          <colspec colwidth="20*"/>
+          <colspec colwidth="20*"/>
+          <colspec colwidth="20*"/>
+          <colspec colwidth="20*"/>
+          <colspec colwidth="20*"/>
+          <tbody>
+            <row>
+              <entry/>
+              <entry><replaceable>X</replaceable></entry>
+              <entry><replaceable>IX</replaceable></entry>
+              <entry><replaceable>S</replaceable></entry>
+              <entry><replaceable>IS</replaceable></entry>
+            </row>
+            <row>
+              <entry><replaceable>X</replaceable></entry>
+              <entry>Conflict</entry>
+              <entry>Conflict</entry>
+              <entry>Conflict</entry>
+              <entry>Conflict</entry>
+            </row>
+            <row>
+              <entry><replaceable>IX</replaceable></entry>
+              <entry>Conflict</entry>
+              <entry>Compatible</entry>
+              <entry>Conflict</entry>
+              <entry>Compatible</entry>
+            </row>
+            <row>
+              <entry><replaceable>S</replaceable></entry>
+              <entry>Conflict</entry>
+              <entry>Conflict</entry>
+              <entry>Compatible</entry>
+              <entry>Compatible</entry>
+            </row>
+            <row>
+              <entry><replaceable>IS</replaceable></entry>
+              <entry>Conflict</entry>
+              <entry>Compatible</entry>
+              <entry>Compatible</entry>
+              <entry>Compatible</entry>
+            </row>
+          </tbody>
+        </tgroup>
+      </informaltable>
+
+      <para>
+        A lock is granted to a requesting transaction if it is
+        compatible with existing locks. A lock is not granted to a
+        requesting transaction if it conflicts with existing locks. A
+        transaction waits until the conflicting existing lock is
+        released. If a lock request conflicts with an existing lock and
+        cannot be granted because it would cause deadlock, an error
+        occurs.
+      </para>
+
+      <para>
+        Thus, intention locks do not block anything except full table
+        requests (for example, <literal>LOCK TABLES ...
+        WRITE</literal>). The main purpose of
+        <replaceable>IX</replaceable> and <replaceable>IS</replaceable>
+        locks is to show that someone is locking a row, or going to lock
+        a row in the table.
+      </para>
+
+      <para>
+        The following example illustrates how an error can occur when a
+        lock request would cause a deadlock. The example involves two
+        clients, A and B.
+      </para>
+
+      <para>
+        First, client A creates a table containing one row, and then
+        begins a transaction. Within the transaction, A obtains an
+        <replaceable>S</replaceable> lock on the row by selecting it in
+        share mode:
+      </para>
+
+<programlisting>
+<!--
+mysql> DROP TABLE IF EXISTS t;
+Query OK, 0 rows affected (0.26 sec)
+-->
+mysql&gt; <userinput>CREATE TABLE t (i INT) ENGINE = InnoDB;</userinput>
+Query OK, 0 rows affected (1.07 sec)
+
+mysql&gt; <userinput>INSERT INTO t (i) VALUES(1);</userinput>
+Query OK, 1 row affected (0.09 sec)
+
+mysql&gt; <userinput>START TRANSACTION;</userinput>
+Query OK, 0 rows affected (0.00 sec)
+
+mysql&gt; <userinput>SELECT * FROM t WHERE i = 1 LOCK IN SHARE MODE;</userinput>
++------+
+| i    |
++------+
+|    1 |
++------+
+1 row in set (0.10 sec)
+</programlisting>
+
+      <para>
+        Next, client B begins a transaction and attempts to delete the
+        row from the table:
+      </para>
+
+<programlisting>
+mysql&gt; <userinput>START TRANSACTION;</userinput>
+Query OK, 0 rows affected (0.00 sec)
+
+mysql&gt; <userinput>DELETE FROM t WHERE i = 1;</userinput>
+</programlisting>
+
+      <para>
+        The delete operation requires an <replaceable>X</replaceable>
+        lock. The lock cannot be granted because it is incompatible with
+        the <replaceable>S</replaceable> lock that client A holds, so
+        the request goes on the queue of lock requests for the row and
+        client B blocks.
+      </para>
+
+      <para>
+        Finally, client A also attempts to delete the row from the
+        table:
+      </para>
+
+<programlisting>
+mysql&gt; <userinput>DELETE FROM t WHERE i = 1;</userinput>
+ERROR 1213 (40001): Deadlock found when trying to get lock;
+try restarting transaction
+</programlisting>
+
+      <para>
+        Deadlock occurs here because client A needs an
+        <replaceable>X</replaceable> lock to delete the row. However,
+        that lock request cannot be granted because client B already has
+        a request for an <replaceable>X</replaceable> lock and is
+        waiting for client A to release its <replaceable>S</replaceable>
+        lock. Nor can the <replaceable>S</replaceable> lock held by A be
+        upgraded to an <replaceable>X</replaceable> lock because of the
+        prior request by B for an <replaceable>X</replaceable> lock. As
+        a result, <literal>InnoDB</literal> generates an error for
+        client A and releases its locks. At that point, the lock request
+        for client B can be granted and B deletes the row from the
+        table.
+      </para>
+
+    </section>
+
+    <section id="innodb-and-autocommit">
+
+      <title><literal>InnoDB</literal> and <literal>AUTOCOMMIT</literal></title>
+
+      <para>
+        In <literal>InnoDB</literal>, all user activity occurs inside a
+        transaction. If the autocommit mode is enabled, each SQL
+        statement forms a single transaction on its own. By default,
+        MySQL starts new connections with autocommit enabled.
+      </para>
+
+      <para>
+        If the autocommit mode is switched off with <literal>SET
+        AUTOCOMMIT = 0</literal>, then we can consider that a user
+        always has a transaction open. An SQL <literal>COMMIT</literal>
+        or <literal>ROLLBACK</literal> statement ends the current
+        transaction and a new one starts. A <literal>COMMIT</literal>
+        means that the changes made in the current transaction are made
+        permanent and become visible to other users. A
+        <literal>ROLLBACK</literal> statement, on the other hand,
+        cancels all modifications made by the current transaction. Both
+        statements release all <literal>InnoDB</literal> locks that were
+        set during the current transaction.
+      </para>
+
+      <para>
+        If the connection has autocommit enabled, the user can still
+        perform a multiple-statement transaction by starting it with an
+        explicit <literal>START TRANSACTION</literal> or
+        <literal>BEGIN</literal> statement and ending it with
+        <literal>COMMIT</literal> or <literal>ROLLBACK</literal>.
+      </para>
+
+    </section>
+
+    <section id="innodb-transaction-isolation">
+
+      <title><literal>InnoDB</literal> and <literal>TRANSACTION ISOLATION
+        LEVEL</literal></title>
+
+      <para>
+        In terms of the SQL:1992 transaction isolation levels, the
+        <literal>InnoDB</literal> default is <literal>REPEATABLE
+        READ</literal>. <literal>InnoDB</literal> offers all four
+        transaction isolation levels described by the SQL standard. You
+        can set the default isolation level for all connections by using
+        the <option>--transaction-isolation</option> option on the
+        command line or in an option file. For example, you can set the
+        option in the <literal>[mysqld]</literal> section of an option
+        file like this:
+      </para>
+
+<programlisting>
+[mysqld]
+transaction-isolation = {READ-UNCOMMITTED | READ-COMMITTED
+                         | REPEATABLE-READ | SERIALIZABLE}
+</programlisting>
+
+      <para>
+        A user can change the isolation level for a single session or
+        for all new incoming connections with the <literal>SET
+        TRANSACTION</literal> statement. Its syntax is as follows:
+      </para>
+
+<programlisting>
+SET [SESSION | GLOBAL] TRANSACTION ISOLATION LEVEL
+                       {READ UNCOMMITTED | READ COMMITTED
+                        | REPEATABLE READ | SERIALIZABLE}
+</programlisting>
+
+      <para>
+        Note that there are hyphens in the level names for the
+        <option>--transaction-isolation</option> option, but not for the
+        <literal>SET TRANSACTION</literal> statement.
+      </para>
+
+      <para>
+        The default behavior is to set the isolation level for the next
+        (not started) transaction. If you use the
+        <literal>GLOBAL</literal> keyword, the statement sets the
+        default transaction level globally for all new connections
+        created from that point on (but not for existing connections).
+        You need the <literal>SUPER</literal> privilege to do this.
+        Using the <literal>SESSION</literal> keyword sets the default
+        transaction level for all future transactions performed on the
+        current connection.
+      </para>
+
+      <para>
+        Any client is free to change the session isolation level (even
+        in the middle of a transaction), or the isolation level for the
+        next transaction.
+      </para>
+
+      <para>
+        You can determine the global and session transaction isolation
+        levels by checking the value of the
+        <literal>tx_isolation</literal> system variable with these
+        statements:
+      </para>
+
+<programlisting>
+SELECT @@global.tx_isolation;
+SELECT @@tx_isolation;
+</programlisting>
+
+      <indexterm>
+        <primary>next-key lock</primary>
+      </indexterm>
+
+      <indexterm>
+        <primary>gap lock</primary>
+      </indexterm>
+
+      <para>
+        In row-level locking, <literal>InnoDB</literal> uses next-key
+        locking. That means that besides index records,
+        <literal>InnoDB</literal> can also lock the <quote>gap</quote>
+        preceding an index record to block insertions by other users
+        immediately before the index record. A next-key lock refers to a
+        lock that locks an index record and the gap before it. A gap
+        lock refers to a lock that only locks a gap before some index
+        record. Next-key locking for searches or index scans can be
+        disabled by enabling the
+        <literal>innodb_locks_unsafe_for_binlog</literal> system
+        variable.
+      </para>
+
+      <para>
+        A detailed description of each isolation level in
+        <literal>InnoDB</literal> follows:
+      </para>
+
+      <itemizedlist>
+
+        <listitem>
+          <para>
+            <literal>READ UNCOMMITTED</literal>
+          </para>
+
+          <para>
+            <literal>SELECT</literal> statements are performed in a
+            non-locking fashion, but a possible earlier version of a
+            record might be used. Thus, using this isolation level, such
+            reads are not consistent. This is also called a <quote>dirty
+            read.</quote> Otherwise, this isolation level works like
+            <literal>READ COMMITTED</literal>.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            <literal>READ COMMITTED</literal>
+          </para>
+
+          <para>
+            A somewhat Oracle-like isolation level. All <literal>SELECT
+            ... FOR UPDATE</literal> and <literal>SELECT ... LOCK IN
+            SHARE MODE</literal> statements lock only the index records,
+            not the gaps before them, and thus allow the free insertion
+            of new records next to locked records.
+            <literal>UPDATE</literal> and <literal>DELETE</literal>
+            statements using a unique index with a unique search
+            condition lock only the index record found, not the gap
+            before it. In range-type <literal>UPDATE</literal> and
+            <literal>DELETE</literal> statements,
+            <literal>InnoDB</literal> must set next-key or gap locks and
+            block insertions by other users to the gaps covered by the
+            range. This is necessary because <quote>phantom rows</quote>
+            must be blocked for MySQL replication and recovery to work.
+          </para>
+
+          <para>
+            Consistent reads behave as in Oracle: Each consistent read,
+            even within the same transaction, sets and reads its own
+            fresh snapshot. See
+            <xref linkend="innodb-consistent-read"/>.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            <literal>REPEATABLE READ</literal>
+          </para>
+
+          <para>
+            This is the default isolation level of
+            <literal>InnoDB</literal>. <literal>SELECT ... FOR
+            UPDATE</literal>, <literal>SELECT ... LOCK IN SHARE
+            MODE</literal>, <literal>UPDATE</literal>, and
+            <literal>DELETE</literal> statements that use a unique index
+            with a unique search condition lock only the index record
+            found, not the gap before it. With other search conditions,
+            these operations employ next-key locking, locking the index
+            range scanned with next-key or gap locks, and block new
+            insertions by other users.
+          </para>
+
+          <para>
+            In consistent reads, there is an important difference from
+            the <literal>READ COMMITTED</literal> isolation level: All
+            consistent reads within the same transaction read the same
+            snapshot established by the first read. This convention
+            means that if you issue several plain
+            <literal>SELECT</literal> statements within the same
+            transaction, these <literal>SELECT</literal> statements are
+            consistent also with respect to each other. See
+            <xref linkend="innodb-consistent-read"/>.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            <literal>SERIALIZABLE</literal>
+          </para>
+
+          <para>
+            This level is like <literal>REPEATABLE READ</literal>, but
+            <literal>InnoDB</literal> implicitly converts all plain
+            <literal>SELECT</literal> statements to <literal>SELECT ...
+            LOCK IN SHARE MODE</literal>.
+          </para>
+        </listitem>
+
+      </itemizedlist>
+
+      <para>
+        In MySQL &current-series;, if the <literal>READ
+        COMMITTED</literal> isolation level is used or the
+        <literal>innodb_locks_unsafe_for_binlog</literal> system
+        variable is enabled, there is no <literal>InnoDB</literal> gap
+        locking except in constraint checking. Also, record locks for
+        non-matching rows are released after MySQL has evaluated the
+        <literal>WHERE</literal> condition.
+      </para>
+
+    </section>
+
+    <section id="innodb-consistent-read">
+
+      <title>Consistent Non-Locking Read</title>
+
+      <para>
+        A consistent read means that <literal>InnoDB</literal> uses
+        multi-versioning to present to a query a snapshot of the
+        database at a point in time. The query sees the changes made by
+        those transactions that committed before that point of time, and
+        no changes made by later or uncommitted transactions. The
+        exception to this rule is that the query sees the changes made
+        by earlier statements within the same transaction. Note that the
+        exception to the rule causes the following anomaly: if you
+        update some rows in a table, a <literal>SELECT</literal> will
+        see the latest version of the updated rows, but it might also
+        see older versions of any rows. If other users simultaneously
+        update the same table, the anomaly means that you may see the
+        table in a state that never existed in the database.
+      </para>
+
+      <para>
+        If you are running with the default <literal>REPEATABLE
+        READ</literal> isolation level, all consistent reads within the
+        same transaction read the snapshot established by the first such
+        read in that transaction. You can get a fresher snapshot for
+        your queries by committing the current transaction and after
+        that issuing new queries.
+      </para>
+
+      <para>
+        Consistent read is the default mode in which
+        <literal>InnoDB</literal> processes <literal>SELECT</literal>
+        statements in <literal>READ COMMITTED</literal> and
+        <literal>REPEATABLE READ</literal> isolation levels. A
+        consistent read does not set any locks on the tables it
+        accesses, and therefore other users are free to modify those
+        tables at the same time a consistent read is being performed on
+        the table.
+      </para>
+
+      <para>
+        Note that consistent read does not work over <literal>DROP
+        TABLE</literal> and over <literal>ALTER TABLE</literal>.
+        Consistent read does not work over <literal>DROP TABLE</literal>
+        because MySQL can't use a table that has been dropped and
+        <literal>InnoDB</literal> destroys the table. Consistent read
+        does not work over <literal>ALTER TABLE</literal> because
+        <literal>ALTER TABLE</literal> works by making a temporary copy
+        of the original table and deleting the original table when the
+        temporary copy is built. When you reissue a consistent read
+        within a transaction, rows in the new table are not visible
+        because those rows did not exist when the transaction's snapshot
+        was taken.
+      </para>
+
+      <para>
+        <literal>InnoDB</literal> uses a consistent read for select in
+        clauses like <literal>INSERT INTO ... SELECT</literal> and
+        <literal>UPDATE ... (SELECT)</literal> that do not specify
+        <literal>FOR UPDATE</literal> or <literal>IN SHARE
+        MODE</literal> if the
+        <literal>innodb_locks_unsafe_for_binlog</literal> option is set
+        and the isolation level of the transaction is not set to
+        serializable. Thus no locks are set to rows read from selected
+        table. Otherwise, <literal>InnoDB</literal> uses stronger locks
+        and the <literal>SELECT</literal> part acts like <literal>READ
+        COMMITTED</literal>, where each consistent read, even within the
+        same transaction, sets and reads its own fresh snapshot.
+      </para>
+
+    </section>
+
+    <section id="innodb-locking-reads">
+
+      <title><literal>SELECT ... FOR UPDATE</literal> and <literal>SELECT ... LOCK IN
+        SHARE MODE</literal> Locking Reads</title>
+
+      <para>
+        In some circumstances, a consistent read is not convenient. For
+        example, you might want to add a new row into your table
+        <literal>child</literal>, and make sure that the child has a
+        parent in table <literal>parent</literal>. The following example
+        shows how to implement referential integrity in your application
+        code.
+      </para>
+
+      <para>
+        Suppose that you use a consistent read to read the table
+        <literal>parent</literal> and indeed see the parent of the child
+        in the table. Can you safely add the child row to table
+        <literal>child</literal>? No, because it may happen that
+        meanwhile some other user deletes the parent row from the table
+        <literal>parent</literal> without you being aware of it.
+      </para>
+
+      <para>
+        The solution is to perform the <literal>SELECT</literal> in a
+        locking mode using <literal>LOCK IN SHARE MODE</literal>:
+      </para>
+
+<programlisting>
+SELECT * FROM parent WHERE NAME = 'Jones' LOCK IN SHARE MODE;
+</programlisting>
+
+      <para>
+        Performing a read in share mode means that we read the latest
+        available data, and set a shared mode lock on the rows we read.
+        A shared mode lock prevents others from updating or deleting the
+        row we have read. Also, if the latest data belongs to a yet
+        uncommitted transaction of another client connection, we wait
+        until that transaction commits. After we see that the preceding
+        query returns the parent <literal>'Jones'</literal>, we can
+        safely add the child record to the <literal>child</literal>
+        table and commit our transaction.
+      </para>
+
+      <para>
+        Let us look at another example: We have an integer counter field
+        in a table <literal>child_codes</literal> that we use to assign
+        a unique identifier to each child added to table
+        <literal>child</literal>. Obviously, using a consistent read or
+        a shared mode read to read the present value of the counter is
+        not a good idea because two users of the database may then see
+        the same value for the counter, and a duplicate-key error occurs
+        if two users attempt to add children with the same identifier to
+        the table.
+      </para>
+
+      <para>
+        Here, <literal>LOCK IN SHARE MODE</literal> is not a good
+        solution because if two users read the counter at the same time,
+        at least one of them ends up in deadlock when attempting to
+        update the counter.
+      </para>
+
+      <para>
+        In this case, there are two good ways to implement the reading
+        and incrementing of the counter: (1) update the counter first by
+        incrementing it by 1 and only after that read it, or (2) read
+        the counter first with a lock mode <literal>FOR
+        UPDATE</literal>, and increment after that. The latter approach
+        can be implemented as follows:
+      </para>
+
+<programlisting>
+SELECT counter_field FROM child_codes FOR UPDATE;
+UPDATE child_codes SET counter_field = counter_field + 1;
+</programlisting>
+
+      <para>
+        A <literal>SELECT ... FOR UPDATE</literal> reads the latest
+        available data, setting exclusive locks on each row it reads.
+        Thus, it sets the same locks a searched SQL
+        <literal>UPDATE</literal> would set on the rows.
+      </para>
+
+      <para>
+        The preceding description is merely an example of how
+        <literal>SELECT ... FOR UPDATE</literal> works. In MySQL, the
+        specific task of generating a unique identifier actually can be
+        accomplished using only a single access to the table:
+      </para>
+
+<programlisting>
+UPDATE child_codes SET counter_field = LAST_INSERT_ID(counter_field + 1);
+SELECT LAST_INSERT_ID();
+</programlisting>
+
+      <para>
+        The <literal>SELECT</literal> statement merely retrieves the
+        identifier information (specific to the current connection). It
+        does not access any table.
+      </para>
+
+      <para>
+        Locks set by <literal>IN SHARE MODE</literal> and <literal>FOR
+        UPDATE</literal> reads are released when the transaction is
+        committed or rolled back.
+      </para>
+
+      <note>
+        <para>
+          Locking of rows for update using <literal>SELECT FOR
+          UPDATE</literal> only applies when
+          <literal>AUTOCOMMIT</literal> is switched off. If
+          <literal>AUTOCOMMIT</literal> is on, then the rows matching
+          the specification are not locked.
+        </para>
+      </note>
+
+    </section>
+
+    <section id="innodb-next-key-locking">
+
+      <title>Next-Key Locking: Avoiding the Phantom Problem</title>
+
+      <indexterm>
+        <primary>next-key lock</primary>
+      </indexterm>
+
+      <indexterm>
+        <primary>gap lock</primary>
+      </indexterm>
+
+      <para>
+        In row-level locking, <literal>InnoDB</literal> uses an
+        algorithm called <emphasis>next-key locking</emphasis>.
+        <literal>InnoDB</literal> performs the row-level locking in such
+        a way that when it searches or scans an index of a table, it
+        sets shared or exclusive locks on the index records it
+        encounters. Thus, the row-level locks are actually index record
+        locks.
+      </para>
+
+      <para>
+        The next-key locks that <literal>InnoDB</literal> sets on index
+        records also affect the <quote>gap</quote> before that index
+        record. If a user has a shared or exclusive lock on record
+        <literal>R</literal> in an index, another user cannot insert a
+        new index record immediately before <literal>R</literal> in the
+        index order. (A gap lock refers to a lock that only locks a gap
+        before some index record.)
+      </para>
+
+      <para>
+        This next-key locking of gaps is done to prevent the so-called
+        <quote>phantom problem.</quote> Suppose that you want to read
+        and lock all children from the <literal>child</literal> table
+        having an identifier value greater than 100, with the intention
+        of updating some column in the selected rows later:
+      </para>
+
+<programlisting>
+SELECT * FROM child WHERE id &gt; 100 FOR UPDATE;
+</programlisting>
+
+      <para>
+        Suppose that there is an index on the <literal>id</literal>
+        column. The query scans that index starting from the first
+        record where <literal>id</literal> is bigger than 100. If the
+        locks set on the index records would not lock out inserts made
+        in the gaps, a new row might meanwhile be inserted to the table.
+        If you execute the same <literal>SELECT</literal> within the
+        same transaction, you would see a new row in the result set
+        returned by the query. This is contrary to the isolation
+        principle of transactions: A transaction should be able to run
+        so that the data it has read does not change during the
+        transaction. If we regard a set of rows as a data item, the new
+        <quote>phantom</quote> child would violate this isolation
+        principle.
+      </para>
+
+      <para>
+        When <literal>InnoDB</literal> scans an index, it can also lock
+        the gap after the last record in the index. Just that happens in
+        the previous example: The locks set by <literal>InnoDB</literal>
+        prevent any insert to the table where <literal>id</literal>
+        would be bigger than 100.
+      </para>
+
+      <para>
+        You can use next-key locking to implement a uniqueness check in
+        your application: If you read your data in share mode and do not
+        see a duplicate for a row you are going to insert, then you can
+        safely insert your row and know that the next-key lock set on
+        the successor of your row during the read prevents anyone
+        meanwhile inserting a duplicate for your row. Thus, the next-key
+        locking allows you to <quote>lock</quote> the non-existence of
+        something in your table.
+      </para>
+
+      <para>
+        In MySQL &current-series;, if the <literal>READ
+        COMMITTED</literal> isolation level is used or the
+        <literal>innodb_locks_unsafe_for_binlog</literal> system
+        variable is enabled, there is no <literal>InnoDB</literal> gap
+        locking except in constraint checking. Also, record locks for
+        non-matching rows are released after MySQL has evaluated the
+        <literal>WHERE</literal> condition.
+      </para>
+
+    </section>
+
+    <section id="innodb-consistent-read-example">
+
+      <title>An Example of Consistent Read in <literal>InnoDB</literal></title>
+
+      <para>
+        Suppose that you are running in the default <literal>REPEATABLE
+        READ</literal> isolation level. When you issue a consistent read
+        (that is, an ordinary <literal>SELECT</literal> statement),
+        <literal>InnoDB</literal> gives your transaction a timepoint
+        according to which your query sees the database. If another
+        transaction deletes a row and commits after your timepoint was
+        assigned, you do not see the row as having been deleted. Inserts
+        and updates are treated similarly.
+      </para>
+
+      <para>
+        You can advance your timepoint by committing your transaction
+        and then doing another <literal>SELECT</literal>.
+      </para>
+
+      <para>
+        This is called <firstterm>multi-versioned concurrency
+        control</firstterm>.
+      </para>
+
+      <remark role="todo">
+        More ASCII art that needs to be turned into a graphic.
+      </remark>
+
+<programlisting>
+               User A                 User B
+
+           SET AUTOCOMMIT=0;      SET AUTOCOMMIT=0;
+time
+|          SELECT * FROM t;
+|          empty set
+|                                 INSERT INTO t VALUES (1, 2);
+|
+v          SELECT * FROM t;
+           empty set
+                                  COMMIT;
+
+           SELECT * FROM t;
+           empty set
+
+           COMMIT;
+
+           SELECT * FROM t;
+           ---------------------
+           |    1    |    2    |
+           ---------------------
+           1 row in set
+</programlisting>
+
+      <para>
+        In this example, user A sees the row inserted by B only when B
+        has committed the insert and A has committed as well, so that
+        the timepoint is advanced past the commit of B.
+      </para>
+
+      <para>
+        If you want to see the <quote>freshest</quote> state of the
+        database, you should use either the <literal>READ
+        COMMITTED</literal> isolation level or a locking read:
+      </para>
+
+<programlisting>
+SELECT * FROM t LOCK IN SHARE MODE;
+</programlisting>
+
+    </section>
+
+    <section id="innodb-locks-set">
+
+      <title>Locks Set by Different SQL Statements in <literal>InnoDB</literal></title>
+
+      <para>
+        A locking read, an <literal>UPDATE</literal>, or a
+        <literal>DELETE</literal> generally set record locks on every
+        index record that is scanned in the processing of the SQL
+        statement. It does not matter if there are
+        <literal>WHERE</literal> conditions in the statement that would
+        exclude the row. <literal>InnoDB</literal> does not remember the
+        exact <literal>WHERE</literal> condition, but only knows which
+        index ranges were scanned. The record locks are normally
+        next-key locks that also block inserts to the <quote>gap</quote>
+        immediately before the record.
+      </para>
+
+      <para>
+        If the locks to be set are exclusive, <literal>InnoDB</literal>
+        always retrieves also the clustered index record and sets a lock
+        on it.
+      </para>
+
+      <para>
+        If you do not have indexes suitable for your statement and MySQL
+        has to scan the whole table to process the statement, every row
+        of the table becomes locked, which in turn blocks all inserts by
+        other users to the table. It is important to create good indexes
+        so that your queries do not unnecessarily need to scan many
+        rows.
+      </para>
+
+      <para>
+        <literal>InnoDB</literal> sets specific types of locks as
+        follows:
+      </para>
+
+      <itemizedlist>
+
+        <listitem>
+          <para>
+            <literal>SELECT ... FROM</literal> is a consistent read,
+            reading a snapshot of the database and setting no locks
+            unless the transaction isolation level is set to
+            <literal>SERIALIZABLE</literal>. For
+            <literal>SERIALIZABLE</literal> level, this sets shared
+            next-key locks on the index records it encounters.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            <literal>SELECT ... FROM ... LOCK IN SHARE MODE</literal>
+            sets shared next-key locks on all index records the read
+            encounters.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            <literal>SELECT ... FROM ... FOR UPDATE</literal> sets
+            exclusive next-key locks on all index records the read
+            encounters and also on the corresponding clustered index
+            records if a secondary index is used in the search.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            <literal>UPDATE ... WHERE ...</literal> sets an exclusive
+            next-key lock on every record the search encounters.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            <literal>DELETE FROM ... WHERE ...</literal> sets an
+            exclusive next-key lock on every record the search
+            encounters.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            <literal>INSERT INTO ... VALUES (...)</literal> sets an
+            exclusive lock on the inserted row. Note that this lock is
+            not a next-key lock and does not prevent other users from
+            inserting to the gap before the inserted row. If a
+            duplicate-key error occurs, a shared lock on the duplicate
+            index record is set.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            <literal>REPLACE</literal> is done like an
+            <literal>INSERT</literal> if there is no collision on a
+            unique key. Otherwise, an exclusive next-key lock is placed
+            on the row that has to be updated.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            While initializing a previously specified
+            <literal>AUTO_INCREMENT</literal> column on a table,
+            <literal>InnoDB</literal> sets an exclusive lock on the end
+            of the index associated with the
+            <literal>AUTO_INCREMENT</literal> column. In accessing the
+            auto-increment counter, <literal>InnoDB</literal> uses a
+            specific table lock mode <literal>AUTO-INC</literal> where
+            the lock lasts only to the end of the current SQL statement,
+            not to the end of the entire transaction. Note that other
+            clients cannot insert into the table while the
+            <literal>AUTO-INC</literal> table lock is held; see
+            <xref linkend="innodb-and-autocommit"/>.
+          </para>
+
+          <para>
+            <literal>InnoDB</literal> fetches the value of a previously
+            initialized <literal>AUTO_INCREMENT</literal> column without
+            setting any locks.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            <literal>INSERT INTO T SELECT ... FROM S WHERE ...</literal>
+            sets an exclusive (non-next-key) lock on each row inserted
+            into <literal>T</literal>. <literal>InnoDB</literal> sets
+            shared next-key locks on <literal>S</literal>, unless
+            <literal>innodb_locks_unsafe_for_binlog</literal> is
+            enabled, in which case it does the search on
+            <literal>S</literal> as a consistent read.
+            <literal>InnoDB</literal> has to set locks in the latter
+            case: In roll-forward recovery from a backup, every SQL
+            statement has to be executed in exactly the same way it was
+            done originally.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            <literal>CREATE TABLE ... SELECT ...</literal> performs the
+            <literal>SELECT</literal> as a consistent read or with
+            shared locks, as in the previous item.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            If a <literal>FOREIGN KEY</literal> constraint is defined on
+            a table, any insert, update, or delete that requires the
+            constraint condition to be checked sets shared record-level
+            locks on the records that it looks at to check the
+            constraint. <literal>InnoDB</literal> also sets these locks
+            in the case where the constraint fails.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            <literal>LOCK TABLES</literal> sets table locks, but it is
+            the higher MySQL layer above the <literal>InnoDB</literal>
+            layer that sets these locks. <literal>InnoDB</literal> is
+            aware of table locks if
+            <literal>innodb_table_locks=1</literal> (the default) and
+            <literal>AUTOCOMMIT=0</literal>, and the MySQL layer above
+            <literal>InnoDB</literal> knows about row-level locks.
+            Otherwise, <literal>InnoDB</literal>'s automatic deadlock
+            detection cannot detect deadlocks where such table locks are
+            involved. Also, because the higher MySQL layer does not know
+            about row-level locks, it is possible to get a table lock on
+            a table where another user currently has row-level locks.
+            However, this does not endanger transaction integrity, as
+            discussed in <xref linkend="innodb-deadlock-detection"/>.
+            See also <xref linkend="innodb-restrictions"/>.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            In MySQL &current-series;, if the <literal>READ
+            COMMITTED</literal> isolation level is used or the
+            <literal>innodb_locks_unsafe_for_binlog</literal> system
+            variable is enabled, there is no <literal>InnoDB</literal>
+            gap locking except in constraint checking. Also, record
+            locks for non-matching rows are released after MySQL has
+            evaluated the <literal>WHERE</literal> condition.
+          </para>
+        </listitem>
+
+      </itemizedlist>
+
+    </section>
+
+    <section id="innodb-implicit-commit">
+
+      <title>Implicit Transaction Commit and Rollback</title>
+
+      <para>
+        By default, MySQL begins each client connection with autocommit
+        mode enabled. When autocommit is enabled, MySQL does a commit
+        after each SQL statement if that statement did not return an
+        error. If an SQL statement returns an error, the commit or
+        rollback behavior depends on the error. See
+        <xref linkend="innodb-error-handling"/>.
+      </para>
+
+      <para>
+        If you have the autocommit mode off and close a connection
+        without explicitly committing the final transaction, MySQL rolls
+        back that transaction.
+      </para>
+
+      <para>
+        For details about which statements implicitly end a transaction,
+        as if you had done a <literal>COMMIT</literal> before executing
+        the statement, see <xref linkend="implicit-commit"/>.
+      </para>
+
+    </section>
+
+    <section id="innodb-deadlock-detection">
+
+      <title>Deadlock Detection and Rollback</title>
+
+      <para>
+        <literal>InnoDB</literal> automatically detects a deadlock of
+        transactions and rolls back a transaction or transactions to
+        break the deadlock. <literal>InnoDB</literal> tries to pick
+        small transactions to roll back, where the size of a transaction
+        is determined by the number of rows inserted, updated, or
+        deleted.
+      </para>
+
+      <para>
+        <literal>InnoDB</literal> is aware of table locks if
+        <literal>innodb_table_locks=1</literal> (the default) and
+        <literal>AUTOCOMMIT=0</literal>, and the MySQL layer above it
+        knows about row-level locks. Otherwise,
+        <literal>InnoDB</literal> cannot detect deadlocks where a table
+        lock set by a MySQL <literal>LOCK TABLES</literal> statement or
+        a lock set by a storage engine other than
+        <literal>InnoDB</literal> is involved. You must resolve these
+        situations by setting the value of the
+        <literal>innodb_lock_wait_timeout</literal> system variable.
+      </para>
+
+      <para>
+        When <literal>InnoDB</literal> performs a complete rollback of a
+        transaction, all locks set by the transaction are released.
+        However, if just a single SQL statement is rolled back as a
+        result of an error, some of the locks set by the statement may
+        be preserved. This happens because <literal>InnoDB</literal>
+        stores row locks in a format such that it cannot know afterward
+        which lock was set by which statement.
+      </para>
+
+    </section>
+
+    <section id="innodb-deadlocks">
+
+      <title>How to Cope with Deadlocks</title>
+
+      <para>
+        Deadlocks are a classic problem in transactional databases, but
+        they are not dangerous unless they are so frequent that you
+        cannot run certain transactions at all. Normally, you must write
+        your applications so that they are always prepared to re-issue a
+        transaction if it gets rolled back because of a deadlock.
+      </para>
+
+      <para>
+        <literal>InnoDB</literal> uses automatic row-level locking. You
+        can get deadlocks even in the case of transactions that just
+        insert or delete a single row. That is because these operations
+        are not really <quote>atomic</quote>; they automatically set
+        locks on the (possibly several) index records of the row
+        inserted or deleted.
+      </para>
+
+      <para>
+        You can cope with deadlocks and reduce the likelihood of their
+        occurrence with the following techniques:
+      </para>
+
+      <itemizedlist>
+
+        <listitem>
+          <para>
+            Use <literal>SHOW ENGINE INNODB STATUS</literal> to
+            determine the cause of the latest deadlock. That can help
+            you to tune your application to avoid deadlocks.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            Always be prepared to re-issue a transaction if it fails due
+            to deadlock. Deadlocks are not dangerous. Just try again.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            Commit your transactions often. Small transactions are less
+            prone to collision.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            If you are using locking reads (<literal>SELECT ... FOR
+            UPDATE</literal> or <literal>... LOCK IN SHARE
+            MODE</literal>), try using a lower isolation level such as
+            <literal>READ COMMITTED</literal>.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            Access your tables and rows in a fixed order. Then
+            transactions form well-defined queues and do not deadlock.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            Add well-chosen indexes to your tables. Then your queries
+            need to scan fewer index records and consequently set fewer
+            locks. Use <literal>EXPLAIN SELECT</literal> to determine
+            which indexes the MySQL server regards as the most
+            appropriate for your queries.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            Use less locking. If you can afford to allow a
+            <literal>SELECT</literal> to return data from an old
+            snapshot, do not add the clause <literal>FOR
+            UPDATE</literal> or <literal>LOCK IN SHARE MODE</literal> to
+            it. Using the <literal>READ COMMITTED</literal> isolation
+            level is good here, because each consistent read within the
+            same transaction reads from its own fresh snapshot.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            If nothing else helps, serialize your transactions with
+            table-level locks. The correct way to use <literal>LOCK
+            TABLES</literal> with transactional tables, such as
+            <literal>InnoDB</literal> tables, is to set
+            <literal>AUTOCOMMIT = 0</literal> and not to call
+            <literal>UNLOCK TABLES</literal> until after you commit the
+            transaction explicitly. For example, if you need to write to
+            table <literal>t1</literal> and read from table
+            <literal>t2</literal>, you can do this:
+          </para>
+
+<programlisting>
+SET AUTOCOMMIT=0;
+LOCK TABLES t1 WRITE, t2 READ, ...;
+<replaceable>... do something with tables t1 and t2 here ...</replaceable>
+COMMIT;
+UNLOCK TABLES;
+</programlisting>
+
+          <para>
+            Table-level locks make your transactions queue nicely, and
+            deadlocks are avoided.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            Another way to serialize transactions is to create an
+            auxiliary <quote>semaphore</quote> table that contains just
+            a single row. Have each transaction update that row before
+            accessing other tables. In that way, all transactions happen
+            in a serial fashion. Note that the <literal>InnoDB</literal>
+            instant deadlock detection algorithm also works in this
+            case, because the serializing lock is a row-level lock. With
+            MySQL table-level locks, the timeout method must be used to
+            resolve deadlocks.
+          </para>
+        </listitem>
+
+      </itemizedlist>
+
+    </section>
+
+  </section>
+
+  <section id="innodb-tuning">
+
+    <title><literal>InnoDB</literal> Performance Tuning Tips</title>
+
+    <indexterm>
+      <primary>Solaris x86_64 issues</primary>
+    </indexterm>
+
+    <itemizedlist>
+
+      <listitem>
+        <para>
+          In <literal>InnoDB</literal>, having a long <literal>PRIMARY
+          KEY</literal> wastes a lot of disk space because its value
+          must be stored with every secondary index record. (See
+          <xref linkend="innodb-table-and-index"/>.) Create an
+          <literal>AUTO_INCREMENT</literal> column as the primary key if
+          your primary key is long.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para>
+          If the Unix <filename>top</filename> tool or the Windows Task
+          Manager shows that the CPU usage percentage with your workload
+          is less than 70%, your workload is probably disk-bound. Maybe
+          you are making too many transaction commits, or the buffer
+          pool is too small. Making the buffer pool bigger can help, but
+          do not set it equal to more than 80% of physical memory.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para>
+          Wrap several modifications into one transaction.
+          <literal>InnoDB</literal> must flush the log to disk at each
+          transaction commit if that transaction made modifications to
+          the database. The rotation speed of a disk is typically at
+          most 167 revolutions/second, which constrains the number of
+          commits to the same 167<superscript>th</superscript> of a
+          second if the disk does not <quote>fool</quote> the operating
+          system.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para>
+          If you can afford the loss of some of the latest committed
+          transactions if a crash occurs, you can set the
+          <literal>innodb_flush_log_at_trx_commit</literal> parameter to
+          0. <literal>InnoDB</literal> tries to flush the log once per
+          second anyway, although the flush is not guaranteed. You
+          should also set the value of
+          <literal>innodb_support_xa</literal> to 0 which will reduce
+          the number of disk flushes due to synchronizing on disk data
+          and the binary log.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para>
+          Make your log files big, even as big as the buffer pool. When
+          <literal>InnoDB</literal> has written the log files full, it
+          has to write the modified contents of the buffer pool to disk
+          in a checkpoint. Small log files cause many unnecessary disk
+          writes. The drawback of big log files is that the recovery
+          time is longer.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para>
+          Make the log buffer quite large as well (on the order of 8MB).
+        </para>
+      </listitem>
+
+      <listitem>
+        <para>
+          Use the <literal>VARCHAR</literal> data type instead of
+          <literal>CHAR</literal> if you are storing variable-length
+          strings or if the column may contain many
+          <literal>NULL</literal> values. A
+          <literal>CHAR(<replaceable>N</replaceable>)</literal> column
+          always takes <replaceable>N</replaceable> characters to store
+          data, even if the string is shorter or its value is
+          <literal>NULL</literal>. Smaller tables fit better in the
+          buffer pool and reduce disk I/O.
+        </para>
+
+        <para>
+          When using <literal>row_format=compact</literal> (the default
+          <literal>InnoDB</literal> record format in MySQL
+          &current-series;) and variable-length character sets, such as
+          <literal>utf8</literal> or <literal>sjis</literal>,
+          <literal>CHAR(<replaceable>N</replaceable>)</literal> will
+          occupy a variable amount of space, at least
+          <replaceable>N</replaceable> bytes.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para>
+          In some versions of GNU/Linux and Unix, flushing files to disk
+          with the Unix <literal>fsync()</literal> call (which
+          <literal>InnoDB</literal> uses by default) and other similar
+          methods is surprisingly slow. If you are dissatisfied with
+          database write performance, you might try setting the
+          <literal>innodb_flush_method</literal> parameter to
+          <literal>O_DSYNC</literal>. Although
+          <literal>O_DSYNC</literal> seems to be slower on most systems,
+          yours might not be one of them.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para>
+          When using the <literal>InnoDB</literal> storage engine on
+          Solaris 10 for x86_64 architecture (AMD Opteron), it is
+          important to mount any filesystems used for storing
+          <literal>InnoDB</literal>-related files using the
+          <literal>forcedirectio</literal> option. (The default on
+          Solaris 10/x86_64 is <emphasis>not</emphasis> to use this
+          option.) Failure to use <literal>forcedirectio</literal>
+          causes a serious degradation of <literal>InnoDB</literal>'s
+          speed and performance on this platform.
+        </para>
+
+        <para>
+          When using the <literal>InnoDB</literal> storage engine with a
+          large <literal>innodb_buffer_pool_size</literal> value on any
+          release of Solaris 2.6 and up and any platform
+          (sparc/x86/x64/amd64), a significant performance gain can be
+          achieved by placing <literal>InnoDB</literal> data files and
+          log files on raw devices or on a separate direct I/O UFS
+          filesystem (using mount option
+          <literal>forcedirectio</literal>; see
+          <literal>mount_ufs(1M)</literal>). Users of the Veritas
+          filesystem VxFS should use the mount option
+          <literal>convosync=direct</literal>.
+        </para>
+
+        <para>
+          Other MySQL data files, such as those for
+          <literal>MyISAM</literal> tables, should not be placed on a
+          direct I/O filesystem. Executables or libraries <emphasis>must
+          not</emphasis> be placed on a direct I/O filesystem.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para>
+          When importing data into <literal>InnoDB</literal>, make sure
+          that MySQL does not have autocommit mode enabled because that
+          requires a log flush to disk for every insert. To disable
+          autocommit during your import operation, surround it with
+          <literal>SET AUTOCOMMIT</literal> and
+          <literal>COMMIT</literal> statements:
+        </para>
+
+<programlisting>
+SET AUTOCOMMIT=0;
+<replaceable>... SQL import statements ...</replaceable>
+COMMIT;
+</programlisting>
+
+        <para>
+          If you use the <command>mysqldump</command> option
+          <option>--opt</option>, you get dump files that are fast to
+          import into an <literal>InnoDB</literal> table, even without
+          wrapping them with the <literal>SET AUTOCOMMIT</literal> and
+          <literal>COMMIT</literal> statements.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para>
+          Beware of big rollbacks of mass inserts:
+          <literal>InnoDB</literal> uses the insert buffer to save disk
+          I/O in inserts, but no such mechanism is used in a
+          corresponding rollback. A disk-bound rollback can take 30
+          times as long to perform as the corresponding insert. Killing
+          the database process does not help because the rollback starts
+          again on server startup. The only way to get rid of a runaway
+          rollback is to increase the buffer pool so that the rollback
+          becomes CPU-bound and runs fast, or to use a special
+          procedure. See <xref linkend="forcing-recovery"/>.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para>
+          Beware also of other big disk-bound operations. Use
+          <literal>DROP TABLE</literal> and <literal>CREATE
+          TABLE</literal> to empty a table, not <literal>DELETE FROM
+          <replaceable>tbl_name</replaceable></literal>.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para>
+          Use the multiple-row <literal>INSERT</literal> syntax to
+          reduce communication overhead between the client and the
+          server if you need to insert many rows:
+        </para>
+
+<programlisting>
+INSERT INTO yourtable VALUES (1,2), (5,5), ...;
+</programlisting>
+
+        <para>
+          This tip is valid for inserts into any table, not just
+          <literal>InnoDB</literal> tables.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para>
+          If you have <literal>UNIQUE</literal> constraints on secondary
+          keys, you can speed up table imports by temporarily turning
+          off the uniqueness checks during the import session:
+        </para>
+
+<programlisting>
+SET UNIQUE_CHECKS=0;
+<replaceable>... import operation ...</replaceable>
+SET UNIQUE_CHECKS=1;
+</programlisting>
+
+        <para>
+          For big tables, this saves a lot of disk I/O because
+          <literal>InnoDB</literal> can use its insert buffer to write
+          secondary index records in a batch. Be certain that the data
+          contains no duplicate keys. <literal>UNIQUE_CHECKS</literal>
+          allows but does not require storage engines to ignore
+          duplicate keys.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para>
+          If you have <literal>FOREIGN KEY</literal> constraints in your
+          tables, you can speed up table imports by turning the foreign
+          key checks off for the duration of the import session:
+        </para>
+
+<programlisting>
+SET FOREIGN_KEY_CHECKS=0;
+<replaceable>... import operation ...</replaceable>
+SET FOREIGN_KEY_CHECKS=1;
+</programlisting>
+
+        <para>
+          For big tables, this can save a lot of disk I/O.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para>
+          If you often have recurring queries for tables that are not
+          updated frequently, use the query cache:
+        </para>
+
+<programlisting>
+[mysqld]
+query_cache_type = ON
+query_cache_size = 10M
+</programlisting>
+      </listitem>
+
+      <listitem>
+        <para>
+          Unlike <literal>MyISAM</literal>, <literal>InnoDB</literal>
+          does not store an index cardinality value in its tables.
+          Instead, <literal>InnoDB</literal> computes a cardinality for
+          a table the first time it accesses it after startup. With a
+          large number of tables, this might take significant time. It
+          is the initial table open operation that is important, so to
+          <quote>warm up</quote> a table for later use, you might want
+          to use it immediately after start up by issuing a statement
+          such as <literal>SELECT 1 FROM
+          <replaceable>tbl_name</replaceable> LIMIT 1</literal>.
+        </para>
+      </listitem>
+
+    </itemizedlist>
+
+    <formalpara role="mnmas">
+
+      <title>MySQL Enterprise</title>
+
+      <para>
+        For optimization recommendations geared to your specific
+        circumstances subscribe to the MySQL Enterprise Monitor. For
+        more information see
+        <ulink url="&base-url-enterprise;advisors.html"/>.
+      </para>
+
+    </formalpara>
+
+    <section id="innodb-monitor">
+
+      <title><literal>SHOW ENGINE INNODB STATUS</literal> and the
+        <literal>InnoDB</literal> Monitors</title>
+
+      <para>
+        <literal>InnoDB</literal> includes <literal>InnoDB</literal>
+        Monitors that print information about the
+        <literal>InnoDB</literal> internal state. You can use the
+        <literal>SHOW ENGINE INNODB STATUS</literal> SQL statement at
+        any time to fetch the output of the standard
+        <literal>InnoDB</literal> Monitor to your SQL client. This
+        information is useful in performance tuning. (If you are using
+        the <command>mysql</command> interactive SQL client, the output
+        is more readable if you replace the usual semicolon statement
+        terminator with <literal>\G</literal>.) For a discussion of
+        <literal>InnoDB</literal> lock modes, see
+        <xref linkend="innodb-lock-modes"/>.
+      </para>
+
+<programlisting>
+mysql&gt; <userinput>SHOW ENGINE INNODB STATUS\G</userinput>
+</programlisting>
+
+      <para>
+        Another way to use <literal>InnoDB</literal> Monitors is to let
+        them periodically write data to the standard output of the
+        <command>mysqld</command> server. In this case, no output is
+        sent to clients. When switched on, <literal>InnoDB</literal>
+        Monitors print data about every 15 seconds. Server output
+        usually is directed to the <filename>.err</filename> log in the
+        MySQL data directory. This data is useful in performance tuning.
+        On Windows, you must start the server from a command prompt in a
+        console window with the <option>--console</option> option if you
+        want to direct the output to the window rather than to the error
+        log.
+      </para>
+
+      <para>
+        Monitor output includes the following types of information:
+      </para>
+
+      <itemizedlist>
+
+        <listitem>
+          <para>
+            Table and record locks held by each active transaction
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            Lock waits of a transactions
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            Semaphore waits of threads
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            Pending file I/O requests
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            Buffer pool statistics
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            Purge and insert buffer merge activity of the main
+            <literal>InnoDB</literal> thread
+          </para>
+        </listitem>
+
+      </itemizedlist>
+
+      <para>
+        To cause the standard <literal>InnoDB</literal> Monitor to write
+        to the standard output of <command>mysqld</command>, use the
+        following SQL statement:
+      </para>
+
+<programlisting>
+CREATE TABLE innodb_monitor (a INT) ENGINE=INNODB;
+</programlisting>
+
+      <para>
+        The monitor can be stopped by issuing the following statement:
+      </para>
+
+<programlisting>
+DROP TABLE innodb_monitor;
+</programlisting>
+
+      <para>
+        The <literal>CREATE TABLE</literal> syntax is just a way to pass
+        a command to the <literal>InnoDB</literal> engine through
+        MySQL's SQL parser: The only things that matter are the table
+        name <literal>innodb_monitor</literal> and that it be an
+        <literal>InnoDB</literal> table. The structure of the table is
+        not relevant at all for the <literal>InnoDB</literal> Monitor.
+        If you shut down the server, the monitor does not restart
+        automatically when you restart the server. You must drop the
+        monitor table and issue a new <literal>CREATE TABLE</literal>
+        statement to start the monitor. (This syntax may change in a
+        future release.)
+      </para>
+
+      <para>
+        You can use <literal>innodb_lock_monitor</literal> in a similar
+        fashion. This is the same as <literal>innodb_monitor</literal>,
+        except that it also provides a great deal of lock information. A
+        separate <literal>innodb_tablespace_monitor</literal> prints a
+        list of created file segments existing in the tablespace and
+        validates the tablespace allocation data structures. In
+        addition, there is <literal>innodb_table_monitor</literal> with
+        which you can print the contents of the
+        <literal>InnoDB</literal> internal data dictionary.
+      </para>
+
+      <para>
+        A sample of <literal>InnoDB</literal> Monitor output:
+      </para>
+
+<programlisting>
+mysql&gt; <userinput>SHOW ENGINE INNODB STATUS\G</userinput>
+*************************** 1. row ***************************
+Status:
+=====================================
+030709 13:00:59 INNODB MONITOR OUTPUT
+=====================================
+Per second averages calculated from the last 18 seconds
+----------
+SEMAPHORES
+----------
+OS WAIT ARRAY INFO: reservation count 413452, signal count 378357
+--Thread 32782 has waited at btr0sea.c line 1477 for 0.00 seconds the
+semaphore: X-lock on RW-latch at 41a28668 created in file btr0sea.c line 135
+a writer (thread id 32782) has reserved it in mode wait exclusive
+number of readers 1, waiters flag 1
+Last time read locked in file btr0sea.c line 731
+Last time write locked in file btr0sea.c line 1347
+Mutex spin waits 0, rounds 0, OS waits 0
+RW-shared spins 108462, OS waits 37964; RW-excl spins 681824, OS waits
+375485
+------------------------
+LATEST FOREIGN KEY ERROR
+------------------------
+030709 13:00:59 Transaction:
+TRANSACTION 0 290328284, ACTIVE 0 sec, process no 3195, OS thread id 34831
+inserting
+15 lock struct(s), heap size 2496, undo log entries 9
+MySQL thread id 25, query id 4668733 localhost heikki update
+insert into ibtest11a (D, B, C) values (5, 'khDk' ,'khDk')
+Foreign key constraint fails for table test/ibtest11a:
+,
+  CONSTRAINT `0_219242` FOREIGN KEY (`A`, `D`) REFERENCES `ibtest11b` (`A`,
+  `D`) ON DELETE CASCADE ON UPDATE CASCADE
+Trying to add in child table, in index PRIMARY tuple:
+ 0: len 4; hex 80000101; asc ....;; 1: len 4; hex 80000005; asc ....;; 2:
+ len 4; hex 6b68446b; asc khDk;; 3: len 6; hex 0000114e0edc; asc ...N..;; 4:
+ len 7; hex 00000000c3e0a7; asc .......;; 5: len 4; hex 6b68446b; asc khDk;;
+But in parent table test/ibtest11b, in index PRIMARY,
+the closest match we can find is record:
+RECORD: info bits 0 0: len 4; hex 8000015b; asc ...[;; 1: len 4; hex
+80000005; asc ....;; 2: len 3; hex 6b6864; asc khd;; 3: len 6; hex
+0000111ef3eb; asc ......;; 4: len 7; hex 800001001e0084; asc .......;; 5:
+len 3; hex 6b6864; asc khd;;
+------------------------
+LATEST DETECTED DEADLOCK
+------------------------
+030709 12:59:58
+*** (1) TRANSACTION:
+TRANSACTION 0 290252780, ACTIVE 1 sec, process no 3185, OS thread id 30733
+inserting
+LOCK WAIT 3 lock struct(s), heap size 320, undo log entries 146
+MySQL thread id 21, query id 4553379 localhost heikki update
+INSERT INTO alex1 VALUES(86, 86, 794,'aA35818','bb','c79166','d4766t',
+'e187358f','g84586','h794',date_format('2001-04-03 12:54:22','%Y-%m-%d
+%H:%i'),7
+*** (1) WAITING FOR THIS LOCK TO BE GRANTED:
+RECORD LOCKS space id 0 page no 48310 n bits 568 table test/alex1 index
+symbole trx id 0 290252780 lock mode S waiting
+Record lock, heap no 324 RECORD: info bits 0 0: len 7; hex 61613335383138;
+asc aa35818;; 1:
+*** (2) TRANSACTION:
+TRANSACTION 0 290251546, ACTIVE 2 sec, process no 3190, OS thread id 32782
+inserting
+130 lock struct(s), heap size 11584, undo log entries 437
+MySQL thread id 23, query id 4554396 localhost heikki update
+REPLACE INTO alex1 VALUES(NULL, 32, NULL,'aa3572','','c3572','d6012t','',
+NULL,'h396', NULL, NULL, 7.31,7.31,7.31,200)
+*** (2) HOLDS THE LOCK(S):
+RECORD LOCKS space id 0 page no 48310 n bits 568 table test/alex1 index
+symbole trx id 0 290251546 lock_mode X locks rec but not gap
+Record lock, heap no 324 RECORD: info bits 0 0: len 7; hex 61613335383138;
+asc aa35818;; 1:
+*** (2) WAITING FOR THIS LOCK TO BE GRANTED:
+RECORD LOCKS space id 0 page no 48310 n bits 568 table test/alex1 index
+symbole trx id 0 290251546 lock_mode X locks gap before rec insert intention
+waiting
+Record lock, heap no 82 RECORD: info bits 0 0: len 7; hex 61613335373230;
+asc aa35720;; 1:
+*** WE ROLL BACK TRANSACTION (1)
+------------
+TRANSACTIONS
+------------
+Trx id counter 0 290328385
+Purge done for trx's n:o &lt; 0 290315608 undo n:o &lt; 0 17
+Total number of lock structs in row lock hash table 70
+LIST OF TRANSACTIONS FOR EACH SESSION:
+---TRANSACTION 0 0, not started, process no 3491, OS thread id 42002
+MySQL thread id 32, query id 4668737 localhost heikki
+show innodb status
+---TRANSACTION 0 290328384, ACTIVE 0 sec, process no 3205, OS thread id
+38929 inserting
+1 lock struct(s), heap size 320
+MySQL thread id 29, query id 4668736 localhost heikki update
+insert into speedc values (1519229,1, 'hgjhjgghggjgjgjgjgjggjgjgjgjgjgggjgjg
+jlhhgghggggghhjhghgggggghjhghghghghghhhhghghghjhhjghjghjkghjghjghjghjfhjfh
+---TRANSACTION 0 290328383, ACTIVE 0 sec, process no 3180, OS thread id
+28684 committing
+1 lock struct(s), heap size 320, undo log entries 1
+MySQL thread id 19, query id 4668734 localhost heikki update
+insert into speedcm values (1603393,1, 'hgjhjgghggjgjgjgjgjggjgjgjgjgjgggjgj
+gjlhhgghggggghhjhghgggggghjhghghghghghhhhghghghjhhjghjghjkghjghjghjghjfhjf
+---TRANSACTION 0 290328327, ACTIVE 0 sec, process no 3200, OS thread id
+36880 starting index read
+LOCK WAIT 2 lock struct(s), heap size 320
+MySQL thread id 27, query id 4668644 localhost heikki Searching rows for
+update
+update ibtest11a set B = 'kHdkkkk' where A = 89572
+------- TRX HAS BEEN WAITING 0 SEC FOR THIS LOCK TO BE GRANTED:
+RECORD LOCKS space id 0 page no 65556 n bits 232 table test/ibtest11a index
+PRIMARY trx id 0 290328327 lock_mode X waiting
+Record lock, heap no 1 RECORD: info bits 0 0: len 9; hex 73757072656d756d00;
+asc supremum.;;
+------------------
+---TRANSACTION 0 290328284, ACTIVE 0 sec, process no 3195, OS thread id
+34831 rollback of SQL statement
+ROLLING BACK 14 lock struct(s), heap size 2496, undo log entries 9
+MySQL thread id 25, query id 4668733 localhost heikki update
+insert into ibtest11a (D, B, C) values (5, 'khDk' ,'khDk')
+---TRANSACTION 0 290327208, ACTIVE 1 sec, process no 3190, OS thread id
+32782
+58 lock struct(s), heap size 5504, undo log entries 159
+MySQL thread id 23, query id 4668732 localhost heikki update
+REPLACE INTO alex1 VALUES(86, 46, 538,'aa95666','bb','c95666','d9486t',
+'e200498f','g86814','h538',date_format('2001-04-03 12:54:22','%Y-%m-%d
+%H:%i'),
+---TRANSACTION 0 290323325, ACTIVE 3 sec, process no 3185, OS thread id
+30733 inserting
+4 lock struct(s), heap size 1024, undo log entries 165
+MySQL thread id 21, query id 4668735 localhost heikki update
+INSERT INTO alex1 VALUES(NULL, 49, NULL,'aa42837','','c56319','d1719t','',
+NULL,'h321', NULL, NULL, 7.31,7.31,7.31,200)
+--------
+FILE I/O
+--------
+I/O thread 0 state: waiting for i/o request (insert buffer thread)
+I/O thread 1 state: waiting for i/o request (log thread)
+I/O thread 2 state: waiting for i/o request (read thread)
+I/O thread 3 state: waiting for i/o request (write thread)
+Pending normal aio reads: 0, aio writes: 0,
+ ibuf aio reads: 0, log i/o's: 0, sync i/o's: 0
+Pending flushes (fsync) log: 0; buffer pool: 0
+151671 OS file reads, 94747 OS file writes, 8750 OS fsyncs
+25.44 reads/s, 18494 avg bytes/read, 17.55 writes/s, 2.33 fsyncs/s
+-------------------------------------
+INSERT BUFFER AND ADAPTIVE HASH INDEX
+-------------------------------------
+Ibuf for space 0: size 1, free list len 19, seg size 21,
+85004 inserts, 85004 merged recs, 26669 merges
+Hash table size 207619, used cells 14461, node heap has 16 buffer(s)
+1877.67 hash searches/s, 5121.10 non-hash searches/s
+---
+LOG
+---
+Log sequence number 18 1212842764
+Log flushed up to   18 1212665295
+Last checkpoint at  18 1135877290
+0 pending log writes, 0 pending chkp writes
+4341 log i/o's done, 1.22 log i/o's/second
+----------------------
+BUFFER POOL AND MEMORY
+----------------------
+Total memory allocated 84966343; in additional pool allocated 1402624
+Buffer pool size   3200
+Free buffers       110
+Database pages     3074
+Modified db pages  2674
+Pending reads 0
+Pending writes: LRU 0, flush list 0, single page 0
+Pages read 171380, created 51968, written 194688
+28.72 reads/s, 20.72 creates/s, 47.55 writes/s
+Buffer pool hit rate 999 / 1000
+--------------
+ROW OPERATIONS
+--------------
+0 queries inside InnoDB, 0 queries in queue
+Main thread process no. 3004, id 7176, state: purging
+Number of rows inserted 3738558, updated 127415, deleted 33707, read 755779
+1586.13 inserts/s, 50.89 updates/s, 28.44 deletes/s, 107.88 reads/s
+----------------------------
+END OF INNODB MONITOR OUTPUT
+============================
+</programlisting>
+
+      <para>
+        Some notes on the output:
+      </para>
+
+      <itemizedlist>
+
+        <listitem>
+          <para>
+            If the <literal>TRANSACTIONS</literal> section reports lock
+            waits, your applications may have lock contention. The
+            output can also help to trace the reasons for transaction
+            deadlocks.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            The <literal>SEMAPHORES</literal> section reports threads
+            waiting for a semaphore and statistics on how many times
+            threads have needed a spin or a wait on a mutex or a rw-lock
+            semaphore. A large number of threads waiting for semaphores
+            may be a result of disk I/O, or contention problems inside
+            <literal>InnoDB</literal>. Contention can be due to heavy
+            parallelism of queries or problems in operating system
+            thread scheduling. Setting
+            <literal>innodb_thread_concurrency</literal> smaller than
+            the default value can help in such situations.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            The <literal>BUFFER POOL AND MEMORY</literal> section gives
+            you statistics on pages read and written. You can calculate
+            from these numbers how many data file I/O operations your
+            queries currently are doing.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            The <literal>ROW OPERATIONS</literal> section shows what the
+            main thread is doing.
+          </para>
+        </listitem>
+
+      </itemizedlist>
+
+      <para>
+        <literal>InnoDB</literal> sends diagnostic output to
+        <literal>stderr</literal> or to files rather than to
+        <literal>stdout</literal> or fixed-size memory buffers, to avoid
+        potential buffer overflows. As a side effect, the output of
+        <literal>SHOW ENGINE INNODB STATUS</literal> is written to a
+        status file in the MySQL data directory every fifteen seconds.
+        The name of the file is
+        <filename>innodb_status.<replaceable>pid</replaceable></filename>,
+        where <replaceable>pid</replaceable> is the server process ID.
+        <literal>InnoDB</literal> removes the file for a normal
+        shutdown. If abnormal shutdowns have occurred, instances of
+        these status files may be present and must be removed manually.
+        Before removing them, you might want to examine them to see
+        whether they contain useful information about the cause of
+        abnormal shutdowns. The
+        <filename>innodb_status.<replaceable>pid</replaceable></filename>
+        file is created only if the configuration option
+        <literal>innodb_status_file=1</literal> is set.
+      </para>
+
+    </section>
+
+  </section>
+
+  <section id="innodb-multi-versioning">
+
+    <title>Implementation of Multi-Versioning</title>
+
+    <para>
+      Because <literal>InnoDB</literal> is a multi-versioned storage
+      engine, it must keep information about old versions of rows in the
+      tablespace. This information is stored in a data structure called
+      a <firstterm>rollback segment</firstterm> (after an analogous data
+      structure in Oracle).
+    </para>
+
+    <para>
+      Internally, <literal>InnoDB</literal> adds two fields to each row
+      stored in the database. A 6-byte field indicates the transaction
+      identifier for the last transaction that inserted or updated the
+      row. Also, a deletion is treated internally as an update where a
+      special bit in the row is set to mark it as deleted. Each row also
+      contains a 7-byte field called the roll pointer. The roll pointer
+      points to an undo log record written to the rollback segment. If
+      the row was updated, the undo log record contains the information
+      necessary to rebuild the content of the row before it was updated.
+    </para>
+
+    <para>
+      <literal>InnoDB</literal> uses the information in the rollback
+      segment to perform the undo operations needed in a transaction
+      rollback. It also uses the information to build earlier versions
+      of a row for a consistent read.
+    </para>
+
+    <para>
+      Undo logs in the rollback segment are divided into insert and
+      update undo logs. Insert undo logs are needed only in transaction
+      rollback and can be discarded as soon as the transaction commits.
+      Update undo logs are used also in consistent reads, but they can
+      be discarded only after there is no transaction present for which
+      <literal>InnoDB</literal> has assigned a snapshot that in a
+      consistent read could need the information in the update undo log
+      to build an earlier version of a database row.
+    </para>
+
+    <para>
+      You must remember to commit your transactions regularly, including
+      those transactions that issue only consistent reads. Otherwise,
+      <literal>InnoDB</literal> cannot discard data from the update undo
+      logs, and the rollback segment may grow too big, filling up your
+      tablespace.
+    </para>
+
+    <para>
+      The physical size of an undo log record in the rollback segment is
+      typically smaller than the corresponding inserted or updated row.
+      You can use this information to calculate the space need for your
+      rollback segment.
+    </para>
+
+    <para>
+      In the <literal>InnoDB</literal> multi-versioning scheme, a row is
+      not physically removed from the database immediately when you
+      delete it with an SQL statement. Only when
+      <literal>InnoDB</literal> can discard the update undo log record
+      written for the deletion can it also physically remove the
+      corresponding row and its index records from the database. This
+      removal operation is called a purge, and it is quite fast, usually
+      taking the same order of time as the SQL statement that did the
+      deletion.
+    </para>
+
+    <para>
+      In a scenario where the user inserts and deletes rows in smallish
+      batches at about the same rate in the table, it is possible that
+      the purge thread starts to lag behind, and the table grows bigger
+      and bigger, making everything disk-bound and very slow. Even if
+      the table carries just 10MB of useful data, it may grow to occupy
+      10GB with all the <quote>dead</quote> rows. In such a case, it
+      would be good to throttle new row operations, and allocate more
+      resources to the purge thread. The
+      <literal>innodb_max_purge_lag</literal> system variable exists for
+      exactly this purpose. See <xref linkend="innodb-parameters"/>, for
+      more information.
+    </para>
+
+  </section>
+
+  <section id="innodb-table-and-index">
+
+    <title><literal>InnoDB</literal> Table and Index Structures</title>
+
+    <para>
+      MySQL stores its data dictionary information for tables in
+      <filename>.frm</filename> files in database directories. This is
+      true for all MySQL storage engines. But every
+      <literal>InnoDB</literal> table also has its own entry in the
+      <literal>InnoDB</literal> internal data dictionary inside the
+      tablespace. When MySQL drops a table or a database, it has to
+      delete both an <filename>.frm</filename> file or files, and the
+      corresponding entries inside the <literal>InnoDB</literal> data
+      dictionary. This is the reason why you cannot move
+      <literal>InnoDB</literal> tables between databases simply by
+      moving the <filename>.frm</filename> files.
+    </para>
+
+    <para>
+      Every <literal>InnoDB</literal> table has a special index called
+      the <firstterm>clustered index</firstterm> where the data for the
+      rows is stored. If you define a <literal>PRIMARY KEY</literal> on
+      your table, the index of the primary key is the clustered index.
+    </para>
+
+    <para>
+      If you do not define a <literal>PRIMARY KEY</literal> for your
+      table, MySQL picks the first <literal>UNIQUE</literal> index that
+      has only <literal>NOT NULL</literal> columns as the primary key
+      and <literal>InnoDB</literal> uses it as the clustered index. If
+      there is no such index in the table, <literal>InnoDB</literal>
+      internally generates a clustered index where the rows are ordered
+      by the row ID that <literal>InnoDB</literal> assigns to the rows
+      in such a table. The row ID is a 6-byte field that increases
+      monotonically as new rows are inserted. Thus, the rows ordered by
+      the row ID are physically in insertion order.
+    </para>
+
+    <para>
+      Accessing a row through the clustered index is fast because the
+      row data is on the same page where the index search leads. If a
+      table is large, the clustered index architecture often saves a
+      disk I/O when compared to the traditional solution. (In many
+      database systems, data storage uses a different page from the
+      index record.)
+    </para>
+
+    <para>
+      In <literal>InnoDB</literal>, the records in non-clustered indexes
+      (also called secondary indexes) contain the primary key value for
+      the row. <literal>InnoDB</literal> uses this primary key value to
+      search for the row from the clustered index. Note that if the
+      primary key is long, the secondary indexes use more space.
+    </para>
+
+    <para>
+      <literal>InnoDB</literal> compares <literal>CHAR</literal> and
+      <literal>VARCHAR</literal> strings of different lengths such that
+      the remaining length in the shorter string is treated as if padded
+      with spaces.
+    </para>
+
+    <section id="innodb-physical-structure">
+
+      <title>Physical Structure of an Index</title>
+
+      <para>
+        All <literal>InnoDB</literal> indexes are B-trees where the
+        index records are stored in the leaf pages of the tree. The
+        default size of an index page is 16KB. When new records are
+        inserted, <literal>InnoDB</literal> tries to leave 1/16 of the
+        page free for future insertions and updates of the index
+        records.
+      </para>
+
+      <para>
+        If index records are inserted in a sequential order (ascending
+        or descending), the resulting index pages are about 15/16 full.
+        If records are inserted in a random order, the pages are from
+        1/2 to 15/16 full. If the fill factor of an index page drops
+        below 1/2, <literal>InnoDB</literal> tries to contract the index
+        tree to free the page.
+      </para>
+
+    </section>
+
+    <section id="innodb-insert-buffering">
+
+      <title>Insert Buffering</title>
+
+      <para>
+        It is a common situation in database applications that the
+        primary key is a unique identifier and new rows are inserted in
+        the ascending order of the primary key. Thus, the insertions to
+        the clustered index do not require random reads from a disk.
+      </para>
+
+      <para>
+        On the other hand, secondary indexes are usually non-unique, and
+        insertions into secondary indexes happen in a relatively random
+        order. This would cause a lot of random disk I/O operations
+        without a special mechanism used in <literal>InnoDB</literal>.
+      </para>
+
+      <para>
+        If an index record should be inserted to a non-unique secondary
+        index, <literal>InnoDB</literal> checks whether the secondary
+        index page is in the buffer pool. If that is the case,
+        <literal>InnoDB</literal> does the insertion directly to the
+        index page. If the index page is not found in the buffer pool,
+        <literal>InnoDB</literal> inserts the record to a special insert
+        buffer structure. The insert buffer is kept so small that it
+        fits entirely in the buffer pool, and insertions can be done
+        very fast.
+      </para>
+
+      <para>
+        Periodically, the insert buffer is merged into the secondary
+        index trees in the database. Often it is possible to merge
+        several insertions to the same page of the index tree, saving
+        disk I/O operations. It has been measured that the insert buffer
+        can speed up insertions into a table up to 15 times.
+      </para>
+
+      <para>
+        The insert buffer merging may continue to happen
+        <emphasis>after</emphasis> the inserting transaction has been
+        committed. In fact, it may continue to happen after a server
+        shutdown and restart (see <xref linkend="forcing-recovery"/>).
+      </para>
+
+      <para>
+        The insert buffer merging may take many hours, when many
+        secondary indexes must be updated, and many rows have been
+        inserted. During this time, disk I/O will be increased, which
+        can cause significant slowdown on disk-bound queries. Another
+        significant background I/O operation is the purge thread (see
+        <xref linkend="innodb-multi-versioning"/>).
+      </para>
+
+    </section>
+
+    <section id="innodb-adaptive-hash">
+
+      <title>Adaptive Hash Indexes</title>
+
+      <para>
+        If a table fits almost entirely in main memory, the fastest way
+        to perform queries on it is to use hash indexes.
+        <literal>InnoDB</literal> has a mechanism that monitors index
+        searches made to the indexes defined for a table. If
+        <literal>InnoDB</literal> notices that queries could benefit
+        from building a hash index, it does so automatically.
+      </para>
+
+      <para>
+        Note that the hash index is always built based on an existing
+        B-tree index on the table. <literal>InnoDB</literal> can build a
+        hash index on a prefix of any length of the key defined for the
+        B-tree, depending on the pattern of searches that
+        <literal>InnoDB</literal> observes for the B-tree index. A hash
+        index can be partial: It is not required that the whole B-tree
+        index is cached in the buffer pool. <literal>InnoDB</literal>
+        builds hash indexes on demand for those pages of the index that
+        are often accessed.
+      </para>
+
+      <para>
+        In a sense, <literal>InnoDB</literal> tailors itself through the
+        adaptive hash index mechanism to ample main memory, coming
+        closer to the architecture of main-memory databases.
+      </para>
+
+    </section>
+
+    <section id="innodb-physical-record">
+
+      <title>Physical Row Structure</title>
+
+      <para>
+        The physical record structure for InnoDB tables is dependent on
+        the row format specified when the table was created. By default
+        InnoDB uses the <literal>COMPACT</literal> format, but the
+        <literal>REDUNDANT</literal> format is available to retain
+        compatibility with older versions of MySQL.
+      </para>
+
+      <para>
+        Records in InnoDB <literal>ROW_FORMAT=REDUNDANT</literal> tables
+        have the following characteristics:
+      </para>
+
+      <itemizedlist>
+
+        <listitem>
+          <para>
+            Each index record contains a six-byte header. The header is
+            used to link together consecutive records, and also in
+            row-level locking.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            Records in the clustered index contain fields for all
+            user-defined columns. In addition, there is a six-byte field
+            for the transaction ID and a seven-byte field for the roll
+            pointer.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            If no primary key was defined for a table, each clustered
+            index record also contains a six-byte row ID field.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            Each secondary index record contains also all the fields
+            defined for the clustered index key.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            A record contains also a pointer to each field of the
+            record. If the total length of the fields in a record is
+            less than 128 bytes, the pointer is one byte; otherwise, two
+            bytes. The array of these pointers is called the record
+            directory. The area where these pointers point is called the
+            data part of the record.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            Internally, InnoDB stores fixed-length character columns
+            such as <literal>CHAR(10)</literal> in a fixed-length
+            format. InnoDB truncates trailing spaces from
+            <literal>VARCHAR</literal> columns.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            An SQL <literal>NULL</literal> value reserves 1 or 2 bytes
+            in the record directory. Besides that, an SQL
+            <literal>NULL</literal> value reserves zero bytes in the
+            data part of the record if stored in a variable length
+            column. In a fixed-length column, it reserves the fixed
+            length of the column in the data part of the record. The
+            motivation behind reserving the fixed space for
+            <literal>NULL</literal> values is that it enables an update
+            of the column from <literal>NULL</literal> to a
+            non-<literal>NULL</literal> value to be done in place
+            without causing fragmentation of the index page.
+          </para>
+        </listitem>
+
+      </itemizedlist>
+
+      <para>
+        Records in InnoDB <literal>ROW_FORMAT=COMPACT</literal> tables
+        have the following characteristics:
+      </para>
+
+      <itemizedlist>
+
+        <listitem>
+          <para>
+            Each index record contains a five-byte header that may be
+            preceded by a variable-length header. The header is used to
+            link together consecutive records, and also in row-level
+            locking.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            The record header contains a bit vector for indicating
+            <literal>NULL</literal> columns. The bit vector occupies
+            (<literal>n_nullable</literal>+7)/8 bytes. Columns that are
+            <literal>NULL</literal> will not occupy other space than the
+            bit in this vector.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            For each non-<literal>NULL</literal> variable-length field,
+            the record header contains the length of the column in one
+            or two bytes. Two bytes will only be needed if part of the
+            column is stored externally or the maximum length exceeds
+            255 bytes and the actual length exceeds 127 bytes.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            The record header is followed by the data contents of the
+            columns. Columns that are <literal>NULL</literal> are
+            omitted.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            Records in the clustered index contain fields for all
+            user-defined columns. In addition, there is a six-byte field
+            for the transaction ID and a seven-byte field for the roll
+            pointer.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            If no primary key was defined for a table, each clustered
+            index record also contains a six-byte row ID field.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            Each secondary index record contains also all the fields
+            defined for the clustered index key.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            Internally, InnoDB stores fixed-length, fixed-width
+            character columns such as <literal>CHAR(10)</literal> in a
+            fixed-length format. InnoDB truncates trailing spaces from
+            <literal>VARCHAR</literal> columns.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            Internally, InnoDB attempts to store UTF-8
+            <literal>CHAR(<literal>n</literal>)</literal> columns in
+            <literal>n</literal> bytes by trimming trailing spaces. In
+            <literal>ROW_FORMAT=REDUNDANT</literal>, such columns occupy
+            3*<literal>n</literal> bytes. The motivation behind
+            reserving the minimum space <literal>n</literal> is that it
+            in many cases enables an update of the column to be done in
+            place without causing fragmentation of the index page.
+          </para>
+        </listitem>
+
+      </itemizedlist>
+
+      <para>
+        The presence of the compact row format decreases row storage
+        space by about 20% at the cost of increasing CPU use for some
+        operations. If your workload is a typical one that is limited by
+        cache hit rates and disk speed it is likely to be faster. If it
+        is a rare case that is limited by CPU speed, it might be slower.
+      </para>
+
+    </section>
+
+  </section>
+
+  <section id="file-space-management">
+
+    <title><literal>InnoDB</literal> File Space Management and Disk I/O</title>
+
+    <section id="innodb-disk-io">
+
+      <title><literal>InnoDB</literal> Disk I/O</title>
+
+      <para>
+        <literal>InnoDB</literal> uses simulated asynchronous disk I/O:
+        <literal>InnoDB</literal> creates a number of threads to take
+        care of I/O operations, such as read-ahead.
+      </para>
+
+      <para>
+        There are two read-ahead heuristics in
+        <literal>InnoDB</literal>:
+      </para>
+
+      <itemizedlist>
+
+        <listitem>
+          <para>
+            In sequential read-ahead, if <literal>InnoDB</literal>
+            notices that the access pattern to a segment in the
+            tablespace is sequential, it posts in advance a batch of
+            reads of database pages to the I/O system.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            In random read-ahead, if <literal>InnoDB</literal> notices
+            that some area in a tablespace seems to be in the process of
+            being fully read into the buffer pool, it posts the
+            remaining reads to the I/O system.
+          </para>
+        </listitem>
+
+      </itemizedlist>
+
+      <para>
+        <literal>InnoDB</literal> uses a novel file flush technique
+        called <firstterm>doublewrite</firstterm>. It adds safety to
+        recovery following an operating system crash or a power outage,
+        and improves performance on most varieties of Unix by reducing
+        the need for <literal>fsync()</literal> operations.
+      </para>
+
+      <para>
+        Doublewrite means that before writing pages to a data file,
+        <literal>InnoDB</literal> first writes them to a contiguous
+        tablespace area called the doublewrite buffer. Only after the
+        write and the flush to the doublewrite buffer has completed does
+        <literal>InnoDB</literal> write the pages to their proper
+        positions in the data file. If the operating system crashes in
+        the middle of a page write, <literal>InnoDB</literal> can later
+        find a good copy of the page from the doublewrite buffer during
+        recovery.
+      </para>
+
+    </section>
+
+    <section id="innodb-file-space">
+
+      <title>File Space Management</title>
+
+      <para>
+        The data files that you define in the configuration file form
+        the tablespace of <literal>InnoDB</literal>. The files are
+        simply concatenated to form the tablespace. There is no striping
+        in use. Currently, you cannot define where within the tablespace
+        your tables are allocated. However, in a newly created
+        tablespace, <literal>InnoDB</literal> allocates space starting
+        from the first data file.
+      </para>
+
+      <para>
+        The tablespace consists of database pages with a default size of
+        16KB. The pages are grouped into extents of 64 consecutive
+        pages. The <quote>files</quote> inside a tablespace are called
+        <firstterm>segments</firstterm> in <literal>InnoDB</literal>.
+        The term <quote>rollback segment</quote> is somewhat confusing
+        because it actually contains many tablespace segments.
+      </para>
+
+      <para>
+        Two segments are allocated for each index in
+        <literal>InnoDB</literal>. One is for non-leaf nodes of the
+        B-tree, the other is for the leaf nodes. The idea here is to
+        achieve better sequentiality for the leaf nodes, which contain
+        the data.
+      </para>
+
+      <para>
+        When a segment grows inside the tablespace,
+        <literal>InnoDB</literal> allocates the first 32 pages to it
+        individually. After that <literal>InnoDB</literal> starts to
+        allocate whole extents to the segment. <literal>InnoDB</literal>
+        can add to a large segment up to 4 extents at a time to ensure
+        good sequentiality of data.
+      </para>
+
+      <para>
+        Some pages in the tablespace contain bitmaps of other pages, and
+        therefore a few extents in an <literal>InnoDB</literal>
+        tablespace cannot be allocated to segments as a whole, but only
+        as individual pages.
+      </para>
+
+      <para>
+        When you ask for available free space in the tablespace by
+        issuing a <literal>SHOW TABLE STATUS</literal> statement,
+        <literal>InnoDB</literal> reports the extents that are
+        definitely free in the tablespace. <literal>InnoDB</literal>
+        always reserves some extents for cleanup and other internal
+        purposes; these reserved extents are not included in the free
+        space.
+      </para>
+
+      <para>
+        When you delete data from a table, <literal>InnoDB</literal>
+        contracts the corresponding B-tree indexes. Whether the freed
+        space becomes available for other users depends on whether the
+        pattern of deletes frees individual pages or extents to the
+        tablespace. Dropping a table or deleting all rows from it is
+        guaranteed to release the space to other users, but remember
+        that deleted rows are physically removed only in an (automatic)
+        purge operation after they are no longer needed for transaction
+        rollbacks or consistent reads. (See
+        <xref linkend="innodb-multi-versioning"/>.)
+      </para>
+
+    </section>
+
+    <section id="innodb-file-defragmenting">
+
+      <title>Defragmenting a Table</title>
+
+      <para>
+        If there are random insertions into or deletions from the
+        indexes of a table, the indexes may become fragmented.
+        Fragmentation means that the physical ordering of the index
+        pages on the disk is not close to the index ordering of the
+        records on the pages, or that there are many unused pages in the
+        64-page blocks that were allocated to the index.
+      </para>
+
+      <para>
+        A symptom of fragmentation is that a table takes more space than
+        it <quote>should</quote> take. How much that is exactly, is
+        difficult to determine. All <literal>InnoDB</literal> data and
+        indexes are stored in B-trees, and their fill factor may vary
+        from 50% to 100%. Another symptom of fragmentation is that a
+        table scan such as this takes more time than it
+        <quote>should</quote> take:
+      </para>
+
+<programlisting>
+SELECT COUNT(*) FROM t WHERE a_non_indexed_column &lt;&gt; 12345;
+</programlisting>
+
+      <para>
+        (In the preceding query, we are <quote>fooling</quote> the SQL
+        optimizer into scanning the clustered index, rather than a
+        secondary index.) Most disks can read 10 to 50MB/s, which can be
+        used to estimate how fast a table scan should run.
+      </para>
+
+      <para>
+        It can speed up index scans if you periodically perform a
+        <quote>null</quote> <literal>ALTER TABLE</literal> operation:
+      </para>
+
+<programlisting>
+ALTER TABLE <replaceable>tbl_name</replaceable> ENGINE=INNODB
+</programlisting>
+
+      <para>
+        That causes MySQL to rebuild the table. Another way to perform a
+        defragmentation operation is to use <command>mysqldump</command>
+        to dump the table to a text file, drop the table, and reload it
+        from the dump file.
+      </para>
+
+      <para>
+        If the insertions to an index are always ascending and records
+        are deleted only from the end, the <literal>InnoDB</literal>
+        filespace management algorithm guarantees that fragmentation in
+        the index does not occur.
+      </para>
+
+    </section>
+
+  </section>
+
+  <section id="innodb-error-handling">
+
+    <title><literal>InnoDB</literal> Error Handling</title>
+
+    <para>
+      Error handling in <literal>InnoDB</literal> is not always the same
+      as specified in the SQL standard. According to the standard, any
+      error during an SQL statement should cause the rollback of that
+      statement. <literal>InnoDB</literal> sometimes rolls back only
+      part of the statement, or the whole transaction. The following
+      items describe how <literal>InnoDB</literal> performs error
+      handling:
+    </para>
+
+    <itemizedlist>
+
+      <listitem>
+        <para>
+          If you run out of file space in the tablespace, a MySQL
+          <literal>Table is full</literal> error occurs and
+          <literal>InnoDB</literal> rolls back the SQL statement.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para>
+          A transaction deadlock causes <literal>InnoDB</literal> to
+          roll back the entire transaction. In the case of a lock wait
+          timeout, <literal>InnoDB</literal> rolls back only the most
+          recent SQL statement.
+        </para>
+
+        <para>
+          When a transaction rollback occurs due to a deadlock or lock
+          wait timeout, it cancels the effect of the statements within
+          the transaction. But if the start-transaction statement was
+          <literal>START TRANSACTION</literal> or
+          <literal>BEGIN</literal> statement, rollback does not cancel
+          that statement. Further SQL statements become part of the
+          transaction until the occurrence of <literal>COMMIT</literal>,
+          <literal>ROLLBACK</literal>, or some SQL statement that causes
+          an implicit commit.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para>
+          A duplicate-key error rolls back the SQL statement, if you
+          have not specified the <literal>IGNORE</literal> option in
+          your statement.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para>
+          A <literal>row too long error</literal> rolls back the SQL
+          statement.
+        </para>
+      </listitem>
+
+      <listitem>
+        <para>
+          Other errors are mostly detected by the MySQL layer of code
+          (above the <literal>InnoDB</literal> storage engine level),
+          and they roll back the corresponding SQL statement. Locks are
+          not released in a rollback of a single SQL statement.
+        </para>
+      </listitem>
+
+    </itemizedlist>
+
+    <para>
+      During implicit rollbacks, as well as during the execution of an
+      explicit <literal>ROLLBACK</literal> SQL statement, <literal>SHOW
+      PROCESSLIST</literal> displays <literal>Rolling back</literal> in
+      the <literal>State</literal> column for the relevant connection.
+    </para>
+
+    <section id="innodb-error-codes">
+
+      <title><literal>InnoDB</literal> Error Codes</title>
+
+      <para>
+        The following is a non-exhaustive list of common
+        <literal>InnoDB</literal>-specific errors that you may
+        encounter, with information about why each occurs and how to
+        resolve the problem.
+      </para>
+
+      <itemizedlist>
+
+        <listitem>
+          <para>
+            <literal>1005 (ER_CANT_CREATE_TABLE)</literal>
+          </para>
+
+          <para>
+            Cannot create table. If the error message refers to
+            <literal>errno</literal> 150, table creation failed because
+            a foreign key constraint was not correctly formed. If the
+            error message refers to <literal>errno</literal> -1, table
+            creation probably failed because the table includes a column
+            name that matched the name of an internal InnoDB table.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            <literal>1016 (ER_CANT_OPEN_FILE)</literal>
+          </para>
+
+          <para>
+            Cannot find the <literal>InnoDB</literal> table from the
+            <literal>InnoDB</literal> data files, although the
+            <filename>.frm</filename> file for the table exists. See
+            <xref linkend="innodb-troubleshooting-datadict"/>.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            <literal>1114 (ER_RECORD_FILE_FULL)</literal>
+          </para>
+
+          <para>
+            <literal>InnoDB</literal> has run out of free space in the
+            tablespace. You should reconfigure the tablespace to add a
+            new data file.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            <literal>1205 (ER_LOCK_WAIT_TIMEOUT)</literal>
+          </para>
+
+          <para>
+            Lock wait timeout expired. Transaction was rolled back.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            <literal>1213 (ER_LOCK_DEADLOCK)</literal>
+          </para>
+
+          <para>
+            Transaction deadlock. You should rerun the transaction.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            <literal>1216 (ER_NO_REFERENCED_ROW)</literal>
+          </para>
+
+          <para>
+            You are trying to add a row but there is no parent row, and
+            a foreign key constraint fails. You should add the parent
+            row first.
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            <literal>1217 (ER_ROW_IS_REFERENCED)</literal>
+          </para>
+
+          <para>
+            You are trying to delete a parent row that has children, and
+            a foreign key constraint fails. You should delete the
+            children first.
+          </para>
+        </listitem>
+
+      </itemizedlist>
+
+    </section>
+
+    <section id="operating-system-error-codes">
+
+      <title>Operating System Error Codes</title>
+
+      <para>
+        To print the meaning of an operating system error number, use
+        the <command>perror</command> program that comes with the MySQL
+        distribution.
+      </para>
+
+      <remark role="todo">
+        CHECK URL
+      </remark>
+
+      <para>
+        The following table provides a list of some common Linux system
+        error codes. For a more complete list, see
+        <ulink url="http://www.iglu.org.il/lxr/source/include/asm-i386/errno.h">Linux
+        source code</ulink>.
+      </para>
+
+      <itemizedlist>
+
+        <listitem>
+          <para>
+            <literal>1 (EPERM)</literal>
+          </para>
+
+          <para>
+            Operation not permitted
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            <literal>2 (ENOENT)</literal>
+          </para>
+
+          <para>
+            No such file or directory
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            <literal>3 (ESRCH)</literal>
+          </para>
+
+          <para>
+            No such process
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            <literal>4 (EINTR)</literal>
+          </para>
+
+          <para>
+            Interrupted system call
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            <literal>5 (EIO)</literal>
+          </para>
+
+          <para>
+            I/O error
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            <literal>6 (ENXIO)</literal>
+          </para>
+
+          <para>
+            No such device or address
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            <literal>7 (E2BIG)</literal>
+          </para>
+
+          <para>
+            Arg list too long
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            <literal>8 (ENOEXEC)</literal>
+          </para>
+
+          <para>
+            Exec format error
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            <literal>9 (EBADF)</literal>
+          </para>
+
+          <para>
+            Bad file number
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            <literal>10 (ECHILD)</literal>
+          </para>
+
+          <para>
+            No child processes
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            <literal>11 (EAGAIN)</literal>
+          </para>
+
+          <para>
+            Try again
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            <literal>12 (ENOMEM)</literal>
+          </para>
+
+          <para>
+            Out of memory
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            <literal>13 (EACCES)</literal>
+          </para>
+
+          <para>
+            Permission denied
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            <literal>14 (EFAULT)</literal>
+          </para>
+
+          <para>
+            Bad address
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            <literal>15 (ENOTBLK)</literal>
+          </para>
+
+          <para>
+            Block device required
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            <literal>16 (EBUSY)</literal>
+          </para>
+
+          <para>
+            Device or resource busy
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            <literal>17 (EEXIST)</literal>
+          </para>
+
+          <para>
+            File exists
+          </para>
+        </listitem>
+
+        <listitem>
+          <para>
+            <literal>18 (EXDEV)</literal>
+          </para>
+
+          <para>
+            Cross-device link
+          </para>
+        </listitem>
+