Author: jstephens
Date: 2009-09-25 11:34:22 +0200 (Fri, 25 Sep 2009)
New Revision: 16805
Log:
More work on WL #4556 (Cluster chapter reorg):
Moved Disk Data into Management section (5.1)
Moved High-Speed Interconnects into Configuration section (all)
Nuked standalone section files
Rebuilt dependencies
Removed:
trunk/refman-4.1/mysql-cluster-interconnects.xml
trunk/refman-5.0/mysql-cluster-interconnects.xml
trunk/refman-5.1/mysql-cluster-disk-data.xml
trunk/refman-5.1/mysql-cluster-interconnects.xml
Modified:
trunk/ndbapi/Makefile.depends
trunk/refman-4.1/Makefile.depends
trunk/refman-4.1/mysql-cluster-configuration-core.xml
trunk/refman-4.1/mysql-cluster.xml
trunk/refman-5.0/Makefile.depends
trunk/refman-5.0/mysql-cluster-configuration-core.xml
trunk/refman-5.0/mysql-cluster.xml
trunk/refman-5.1-maria/Makefile.depends
trunk/refman-5.1/Makefile.depends
trunk/refman-5.1/mysql-cluster-configuration-core.xml
trunk/refman-5.1/mysql-cluster-management.xml
trunk/refman-5.1/mysql-cluster.xml
trunk/refman-5.4/Makefile.depends
trunk/topic-guides/topics-5.1/Makefile.depends
Modified: trunk/ndbapi/Makefile.depends
===================================================================
--- trunk/ndbapi/Makefile.depends 2009-09-25 07:18:52 UTC (rev 16804)
+++ trunk/ndbapi/Makefile.depends 2009-09-25 09:34:22 UTC (rev 16805)
Changed blocks: 5, Lines Added: 2, Lines Deleted: 6; 2392 bytes
@@ -165,7 +165,7 @@
images/published/NdbDictionary-Object-LogfileGroup-class.png
class_logfilegroup_SOURCES = class-logfilegroup.xml $(class_logfilegroup_INCLUDES)
class_logfilegroup_IDMAPS = \
- ../refman-5.1/metadata/mysql-cluster-disk-data.idmap \
+ ../refman-5.1/metadata/mysql-cluster-management.idmap \
metadata/class-dictionary.idmap \
metadata/class-logfilegroup.idmap \
metadata/class-ndbdictionary.idmap \
@@ -821,7 +821,7 @@
../refman-5.1/metadata/errors-problems-core.idmap \
../refman-5.1/metadata/functions-core.idmap \
../refman-5.1/metadata/mysql-cluster-configuration-core.idmap \
- ../refman-5.1/metadata/mysql-cluster-disk-data.idmap \
+ ../refman-5.1/metadata/mysql-cluster-management.idmap \
../refman-5.1/metadata/partitioning.idmap \
../refman-5.1/metadata/sql-syntax-data-definition.idmap \
metadata/class-column.idmap \
@@ -1032,7 +1032,6 @@
ndb_internals_SOURCES = ndb-internals.xml $(ndb_internals_INCLUDES)
ndb_internals_IDMAPS = \
../refman-5.1/metadata/mysql-cluster-configuration-core.idmap \
- ../refman-5.1/metadata/mysql-cluster-disk-data.idmap \
../refman-5.1/metadata/mysql-cluster-management.idmap \
../refman-5.1/metadata/mysql-cluster-programs-core.idmap \
../refman-5.1/metadata/mysql-cluster-replication.idmap \
@@ -1188,9 +1187,7 @@
../refman-5.1/metadata/errors-problems-core.idmap \
../refman-5.1/metadata/functions-core.idmap \
../refman-5.1/metadata/mysql-cluster-configuration-core.idmap \
- ../refman-5.1/metadata/mysql-cluster-disk-data.idmap \
../refman-5.1/metadata/mysql-cluster-glossary.idmap \
- ../refman-5.1/metadata/mysql-cluster-interconnects.idmap \
../refman-5.1/metadata/mysql-cluster-management.idmap \
../refman-5.1/metadata/mysql-cluster-programs-core.idmap \
../refman-5.1/metadata/mysql-cluster-replication.idmap \
@@ -1260,7 +1257,6 @@
overview_IDMAPS = \
../refman-5.1/metadata/mysql-cluster-configuration-core.idmap \
../refman-5.1/metadata/mysql-cluster-glossary.idmap \
- ../refman-5.1/metadata/mysql-cluster-interconnects.idmap \
../refman-5.1/metadata/mysql-cluster-programs-core.idmap \
../refman-5.1/metadata/partitioning.idmap \
metadata/class-column.idmap \
Modified: trunk/refman-4.1/Makefile.depends
===================================================================
--- trunk/refman-4.1/Makefile.depends 2009-09-25 07:18:52 UTC (rev 16804)
+++ trunk/refman-4.1/Makefile.depends 2009-09-25 09:34:22 UTC (rev 16805)
Changed blocks: 11, Lines Added: 0, Lines Deleted: 31; 5383 bytes
@@ -1015,7 +1015,6 @@
metadata/dba-mysqld-server-core.idmap \
metadata/installing.idmap \
metadata/mysql-cluster-configuration-core.idmap \
- metadata/mysql-cluster-interconnects.idmap \
metadata/mysql-cluster-limitations.idmap \
metadata/mysql-cluster-management.idmap \
metadata/mysql-cluster-optvar-core.idmap \
@@ -1833,7 +1832,6 @@
mysql-cluster-configuration-core.xml \
mysql-cluster-faq.xml \
mysql-cluster-glossary.xml \
- mysql-cluster-interconnects.xml \
mysql-cluster-limitations.xml \
mysql-cluster-management.xml \
mysql-cluster-multi-computer.xml \
@@ -2005,7 +2003,6 @@
../refman-5.0/metadata/introduction.idmap \
../refman-5.0/metadata/sql-syntax-data-manipulation.idmap \
../refman-5.1/metadata/introduction.idmap \
- ../refman-5.1/metadata/mysql-cluster-disk-data.idmap \
../refman-5.1/metadata/mysql-cluster-management.idmap \
../refman-5.1/metadata/mysql-cluster-replication.idmap \
../refman-5.1/metadata/mysql-cluster-roadmap.idmap \
@@ -2069,7 +2066,6 @@
metadata/mysql-cluster-configuration-core.idmap \
metadata/mysql-cluster-faq.idmap \
metadata/mysql-cluster-glossary.idmap \
- metadata/mysql-cluster-interconnects.idmap \
metadata/mysql-cluster-limitations.idmap \
metadata/mysql-cluster-management.idmap \
metadata/mysql-cluster-multi-computer.idmap \
@@ -2139,13 +2135,11 @@
mysql_cluster_faq_IDMAPS = \
../ndbapi/metadata/ndb-internals.idmap \
../ndbapi/metadata/overview.idmap \
- ../refman-5.1/metadata/mysql-cluster-disk-data.idmap \
../refman-5.1/metadata/mysql-cluster-management.idmap \
../refman-5.1/metadata/mysql-cluster-replication.idmap \
../refman-5.1/metadata/mysql-cluster-roadmap.idmap \
metadata/data-types.idmap \
metadata/mysql-cluster-configuration-core.idmap \
- metadata/mysql-cluster-interconnects.idmap \
metadata/mysql-cluster-limitations.idmap \
metadata/mysql-cluster-management.idmap \
metadata/mysql-cluster-multi-computer.idmap \
@@ -2173,7 +2167,6 @@
mysql_cluster_glossary_SOURCES = mysql-cluster-glossary.xml $(mysql_cluster_glossary_INCLUDES)
mysql_cluster_glossary_IDMAPS = \
metadata/mysql-cluster-configuration-core.idmap \
- metadata/mysql-cluster-interconnects.idmap \
metadata/mysql-cluster-management.idmap \
metadata/mysql-cluster-overview.idmap \
metadata/mysql-cluster-programs-core.idmap
@@ -2186,26 +2179,6 @@
mysql-cluster-glossary-manprepped.xml: $(mysql_cluster_glossary_SOURCES) $(mysql_cluster_glossary_IDMAPS)
mysql-cluster-glossary-remprepped.xml: $(mysql_cluster_glossary_SOURCES) $(mysql_cluster_glossary_IDMAPS)
-mysql_cluster_interconnects_INCLUDES = \
- ../common/fixedchars.ent \
- ../common/phrases.ent \
- ../refman-common/urls.ent \
- all-entities.ent \
- versions.ent
-mysql_cluster_interconnects_IMAGES =
-mysql_cluster_interconnects_SOURCES = mysql-cluster-interconnects.xml $(mysql_cluster_interconnects_INCLUDES)
-mysql_cluster_interconnects_IDMAPS = \
- metadata/mysql-cluster-configuration-core.idmap \
- metadata/mysql-cluster-interconnects.idmap
-mysql-cluster-interconnects.validpure: $(mysql_cluster_interconnects_SOURCES)
-mysql-cluster-interconnects.titles: $(mysql_cluster_interconnects_SOURCES)
-mysql-cluster-interconnects.useless: $(mysql_cluster_interconnects_SOURCES)
-mysql-cluster-interconnects.valid: $(mysql_cluster_interconnects_SOURCES) $(mysql_cluster_interconnects_IDMAPS)
-mysql-cluster-interconnects.validwarn: $(mysql_cluster_interconnects_SOURCES) $(mysql_cluster_interconnects_IDMAPS)
-mysql-cluster-interconnects-prepped.xml: $(mysql_cluster_interconnects_SOURCES) $(mysql_cluster_interconnects_IDMAPS)
-mysql-cluster-interconnects-manprepped.xml: $(mysql_cluster_interconnects_SOURCES) $(mysql_cluster_interconnects_IDMAPS)
-mysql-cluster-interconnects-remprepped.xml: $(mysql_cluster_interconnects_SOURCES) $(mysql_cluster_interconnects_IDMAPS)
-
mysql_cluster_limitations_INCLUDES = \
../common/fixedchars.ent \
../common/phrases.ent \
@@ -2325,7 +2298,6 @@
../ndbapi/metadata/mgm-api.idmap \
metadata/dba-mysqld-max.idmap \
metadata/mysql-cluster-configuration-core.idmap \
- metadata/mysql-cluster-interconnects.idmap \
metadata/mysql-cluster-limitations.idmap \
metadata/mysql-cluster-overview.idmap \
metadata/mysql-cluster-security.idmap
@@ -2412,7 +2384,6 @@
mysql-cluster-configuration-core.xml \
mysql-cluster-faq.xml \
mysql-cluster-glossary.xml \
- mysql-cluster-interconnects.xml \
mysql-cluster-limitations.xml \
mysql-cluster-management.xml \
mysql-cluster-multi-computer.xml \
@@ -2439,7 +2410,6 @@
../ndbapi/metadata/ndb-errors.idmap \
../ndbapi/metadata/ndb-internals.idmap \
../ndbapi/metadata/overview.idmap \
- ../refman-5.1/metadata/mysql-cluster-disk-data.idmap \
../refman-5.1/metadata/mysql-cluster-management.idmap \
../refman-5.1/metadata/mysql-cluster-replication.idmap \
../refman-5.1/metadata/mysql-cluster-roadmap.idmap \
@@ -2454,7 +2424,6 @@
metadata/mysql-cluster-configuration-core.idmap \
metadata/mysql-cluster-faq.idmap \
metadata/mysql-cluster-glossary.idmap \
- metadata/mysql-cluster-interconnects.idmap \
metadata/mysql-cluster-limitations.idmap \
metadata/mysql-cluster-management.idmap \
metadata/mysql-cluster-multi-computer.idmap \
Modified: trunk/refman-4.1/mysql-cluster-configuration-core.xml
===================================================================
--- trunk/refman-4.1/mysql-cluster-configuration-core.xml 2009-09-25 07:18:52 UTC (rev 16804)
+++ trunk/refman-4.1/mysql-cluster-configuration-core.xml 2009-09-25 09:34:22 UTC (rev 16805)
Changed blocks: 2, Lines Added: 511, Lines Deleted: 194; 30848 bytes
@@ -5119,6 +5119,269 @@
</section>
+ <section id="mysql-cluster-config-lcp-params">
+
+ <title>Configuring MySQL Cluster Parameters for Local Checkpoints</title>
+
+ <indexterm>
+ <primary>DataMemory</primary>
+ </indexterm>
+
+ <indexterm>
+ <primary>IndexMemory</primary>
+ </indexterm>
+
+ <indexterm>
+ <primary>NoOfDiskPagesToDiskAfterRestartACC</primary>
+ <secondary>calculating</secondary>
+ </indexterm>
+
+ <indexterm>
+ <primary>NoOfDiskPagesToDiskAfterRestartTUP</primary>
+ <secondary>calculating</secondary>
+ </indexterm>
+
+ <indexterm>
+ <primary>NoOfFragmentLogFiles</primary>
+ <secondary>calculating</secondary>
+ </indexterm>
+
+ <indexterm>
+ <primary>local checkpoints (MySQL Cluster)</primary>
+ </indexterm>
+
+ <indexterm>
+ <primary>MySQL Cluster</primary>
+ <secondary>configuration</secondary>
+ </indexterm>
+
+ <remark role="note">
+ [js] Adapted from an item in Mikael Ronström's blog,
+ 2006-07-11.
+ </remark>
+
+ <para>
+ The parameters discussed in
+ <link linkend="mysql-cluster-logging-and-checkpointing">Logging
+ and Checkpointing</link> and in
+ <link linkend="mysql-cluster-data-memory-index-memory-string-memory">Data
+ Memory, Index Memory, and String Memory</link> that are used to
+ configure local checkpoints for a MySQL Cluster do not exist in
+ isolation, but rather are very much interdepedent on each other.
+ In this section, we illustrate how these parameters —
+ including <literal>DataMemory</literal>,
+ <literal>IndexMemory</literal>,
+ <literal>NoOfDiskPagesToDiskAfterRestartTUP</literal>,
+ <literal>NoOfDiskPagesToDiskAfterRestartACC</literal>, and
+ <literal>NoOfFragmentLogFiles</literal> — relate to one
+ another in a working Cluster.
+ </para>
+
+ <para>
+ In this example, we assume that our application performs the
+ following numbers of types of operations per hour:
+ </para>
+
+ <itemizedlist>
+
+ <listitem>
+ <para>
+ 50000 selects
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ 15000 inserts
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ 15000 updates
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ 15000 deletes
+ </para>
+ </listitem>
+
+ </itemizedlist>
+
+ <para>
+ We also make the following assumptions about the data used in
+ the application:
+ </para>
+
+ <itemizedlist>
+
+ <listitem>
+ <para>
+ We are working with a single table having 40 columns.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ Each column can hold up to 32 bytes of data.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ A typical <literal role="stmt">UPDATE</literal> run by the
+ application affects the values of 5 columns.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ No <literal>NULL</literal> values are inserted by the
+ application.
+ </para>
+ </listitem>
+
+ </itemizedlist>
+
+ <para>
+ A good starting point is to determine the amount of time that
+ should elapse between local checkpoints (LCPs). It is worth
+ noting that, in the event of a system restart, it takes 40-60
+ percent of this interval to execute the REDO log — for
+ example, if the time between LCPs is 5 minutes (300 seconds),
+ then it should take 2 to 3 minutes (120 to 180 seconds) for the
+ REDO log to be read.
+ </para>
+
+ <para>
+ The maximum amount of data per node can be assumed to be the
+ size of the <literal>DataMemory</literal> parameter. In this
+ example, we assume that this is 2 GB. The
+ <literal>NoOfDiskPagesToDiskAfterRestartTUP</literal> parameter
+ represents the amount of data to be checkpointed per unit time
+ — however, this parameter is actually expressed as the
+ number of 8K memory pages to be checkpointed per 100
+ milliseconds. 2 GB per 300 seconds is approximately 6.8 MB per
+ second, or 700 KB per 100 milliseconds, which works out to
+ roughly 85 pages per 100 milliseconds.
+ </para>
+
+ <para>
+ Similarly, we can calculate
+ <literal>NoOfDiskPagesToDiskAfterRestartACC</literal> in terms
+ of the time for local checkpoints and the amount of memory
+ required for indexes — that is, the
+ <literal>IndexMemory</literal>. Assuming that we allow 512 MB
+ for indexes, this works out to approximately 20 8-KB pages per
+ 100 milliseconds for this parameter.
+ </para>
+
+ <para>
+ Next, we need to determine the number of REDO log files required
+ — that is, fragment log files — the corresponding
+ parameter being <literal>NoOfFragmentLogFiles</literal>. We need
+ to make sure that there are sufficient REDO log files for
+ keeping records for at least 3 local checkpoints. In a
+ production setting, there are always uncertainties — for
+ instance, we cannot be sure that disks always operate at top
+ speed or with maximum throughput. For this reason, it is best to
+ err on the side of caution, so we double our requirement and
+ calculate a number of fragment log files which should be enough
+ to keep records covering 6 local checkpoints.
+ </para>
+
+ <para>
+ It is also important to remember that the disk also handles
+ writes to the REDO log and UNDO log, so if you find that the
+ amount of data being written to disk as determined by the values
+ of <literal>NoOfDiskPagesToDiskAfterRestartACC</literal> and
+ <literal>NoOfDiskPagesToDiskAfterRestartTUP</literal> is
+ approaching the amount of disk bandwidth available, you may wish
+ to increase the time between local checkpoints.
+ </para>
+
+ <para>
+ Given 5 minutes (300 seconds) per local checkpoint, this means
+ that we need to support writing log records at maximum speed for
+ 6 * 300 = 1800 seconds. The size of a REDO log record is 72
+ bytes plus 4 bytes per updated column value plus the maximum
+ size of the updated column, and there is one REDO log record for
+ each table record updated in a transaction, on each node where
+ the data reside. Using the numbers of operations set out
+ previously in this section, we derive the following:
+ </para>
+
+ <itemizedlist>
+
+ <listitem>
+ <para>
+ 50000 select operations per hour yields 0 log records (and
+ thus 0 bytes), since <literal role="stmt">SELECT</literal>
+ statements are not recorded in the REDO log.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ 15000 <literal role="stmt">DELETE</literal> statements per
+ hour is approximately 5 delete operations per second. (Since
+ we wish to be conservative in our estimate, we round up here
+ and in the following calculations.) No columns are updated
+ by deletes, so these statements consume only 5 operations *
+ 72 bytes per operation = 360 bytes per second.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ 15000 <literal role="stmt">UPDATE</literal> statements per
+ hour is roughly the same as 5 updates per second. Each
+ update uses 72 bytes, plus 4 bytes per column * 5 columns
+ updated, plus 32 bytes per column * 5 columns — this
+ works out to 72 + 20 + 160 = 252 bytes per operation, and
+ multiplying this by 5 operation per second yields 1260 bytes
+ per second.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ 15000 <literal role="stmt">INSERT</literal> statements per
+ hour is equivalent to 5 insert operations per second. Each
+ insert requires REDO log space of 72 bytes, plus 4 bytes per
+ record * 40 columns, plus 32 bytes per column * 40 columns,
+ which is 72 + 160 + 1280 = 1512 bytes per operation. This
+ times 5 operations per second yields 7560 bytes per second.
+ </para>
+ </listitem>
+
+ </itemizedlist>
+
+ <para>
+ So the total number of REDO log bytes being written per second
+ is approximately 0 + 360 + 1260 + 7560 = 9180 bytes. Multiplied
+ by 1800 seconds, this yields 16524000 bytes required for REDO
+ logging, or approximately 15.75 MB. The unit used for
+ <literal>NoOfFragmentLogFiles</literal> represents a set of 4
+ 16-MB log files — that is, 64 MB. Thus, the minimum value
+ (3) for this parameter is sufficient for the scenario envisioned
+ in this example, since 3 times 64 = 192 MB, or about 12 times
+ what is required; the default value of 8 (or 512 MB) is more
+ than ample in this case.
+ </para>
+
+ <para>
+ A copy of each altered table record is kept in the UNDO log. In
+ the scenario discussed above, the UNDO log would not require any
+ more space than what is provided by the default seetings.
+ However, given the size of disks, it is sensible to allocate at
+ least 1 GB for it.
+ </para>
+
+ </section>
+
</section>
<section id="mysql-cluster-params-overview">
@@ -5424,264 +5687,318 @@
</section>
- <section id="mysql-cluster-config-lcp-params">
+ <section id="mysql-cluster-interconnects">
- <title>Configuring MySQL Cluster Parameters for Local Checkpoints</title>
+ <title>Using High-Speed Interconnects with MySQL Cluster</title>
<indexterm>
- <primary>DataMemory</primary>
+ <primary>MySQL Cluster</primary>
+ <secondary>network transporters</secondary>
</indexterm>
<indexterm>
- <primary>IndexMemory</primary>
- </indexterm>
-
- <indexterm>
- <primary>NoOfDiskPagesToDiskAfterRestartACC</primary>
- <secondary>calculating</secondary>
- </indexterm>
-
- <indexterm>
- <primary>NoOfDiskPagesToDiskAfterRestartTUP</primary>
- <secondary>calculating</secondary>
- </indexterm>
-
- <indexterm>
- <primary>NoOfFragmentLogFiles</primary>
- <secondary>calculating</secondary>
- </indexterm>
-
- <indexterm>
- <primary>local checkpoints (MySQL Cluster)</primary>
- </indexterm>
-
- <indexterm>
<primary>MySQL Cluster</primary>
- <secondary>configuration</secondary>
+ <secondary>interconnects</secondary>
</indexterm>
- <remark role="note">
- [js] Adapted from an item in Mikael Ronström's blog, 2006-07-11.
- </remark>
-
<para>
- The parameters discussed in
- <link linkend="mysql-cluster-logging-and-checkpointing">Logging
- and Checkpointing</link> and in
- <link linkend="mysql-cluster-data-memory-index-memory-string-memory">Data
- Memory, Index Memory, and String Memory</link> that are used to
- configure local checkpoints for a MySQL Cluster do not exist in
- isolation, but rather are very much interdepedent on each other.
- In this section, we illustrate how these parameters —
- including <literal>DataMemory</literal>,
- <literal>IndexMemory</literal>,
- <literal>NoOfDiskPagesToDiskAfterRestartTUP</literal>,
- <literal>NoOfDiskPagesToDiskAfterRestartACC</literal>, and
- <literal>NoOfFragmentLogFiles</literal> — relate to one
- another in a working Cluster.
+ Even before design of <literal role="se">NDBCLUSTER</literal>
+ began in 1996, it was evident that one of the major problems to be
+ encountered in building parallel databases would be communication
+ between the nodes in the network. For this reason,
+ <literal role="se">NDBCLUSTER</literal> was designed from the very
+ beginning to allow for the use of a number of different data
+ transport mechanisms. In this Manual, we use the term
+ <firstterm>transporter</firstterm> for these.
</para>
<para>
- In this example, we assume that our application performs the
- following numbers of types of operations per hour:
+ The MySQL Cluster codebase includes support for four different
+ transporters:
</para>
<itemizedlist>
<listitem>
<para>
- 50000 selects
+ <emphasis>TCP/IP using 100 Mbps or gigabit
+ Ethernet</emphasis>, as discussed in
+ <xref linkend="mysql-cluster-tcp-definition"/>.
</para>
</listitem>
<listitem>
<para>
- 15000 inserts
+ <emphasis>Direct (machine-to-machine) TCP/IP</emphasis>;
+ although this transporter uses the same TCP/IP protocol as
+ mentioned in the previous item, it requires setting up the
+ hardware differently and is configured differently as well.
+ For this reason, it is considered a separate transport
+ mechanism for MySQL Cluster. See
+ <xref linkend="mysql-cluster-tcp-definition-direct"/>, for
+ details.
</para>
</listitem>
<listitem>
<para>
- 15000 updates
+ <emphasis>Shared memory (SHM)</emphasis>. For more information
+ about SHM, see <xref linkend="mysql-cluster-shm-definition"/>.
</para>
</listitem>
<listitem>
<para>
- 15000 deletes
+ <emphasis>Scalable Coherent Interface (SCI)</emphasis>, as
+ described in the next section of this chapter,
+ <xref linkend="mysql-cluster-sci-definition"/>.
</para>
</listitem>
</itemizedlist>
<para>
- We also make the following assumptions about the data used in the
- application:
+ Most users today employ TCP/IP over Ethernet because it is
+ ubiquitous. TCP/IP is also by far the best-tested transporter for
+ use with MySQL Cluster.
</para>
- <itemizedlist>
+ <para>
+ We are working to make sure that communication with the
+ <command>ndbd</command> process is made in <quote>chunks</quote>
+ that are as large as possible because this benefits all types of
+ data transmission.
+ </para>
- <listitem>
- <para>
- We are working with a single table having 40 columns.
- </para>
- </listitem>
+ <para>
+ For users who desire it, it is also possible to use cluster
+ interconnects to enhance performance even further. There are two
+ ways to achieve this: Either a custom transporter can be designed
+ to handle this case, or you can use socket implementations that
+ bypass the TCP/IP stack to one extent or another. We have
+ experimented with both of these techniques using the SCI (Scalable
+ Coherent Interface) technology developed by
+ <ulink url="http://www.dolphinics.com/">Dolphin</ulink>.
+ </para>
- <listitem>
- <para>
- Each column can hold up to 32 bytes of data.
- </para>
- </listitem>
+ <section id="mysql-cluster-sci-sockets">
- <listitem>
- <para>
- A typical <literal role="stmt">UPDATE</literal> run by the
- application affects the values of 5 columns.
- </para>
- </listitem>
+ <title>Configuring MySQL Cluster to use SCI Sockets</title>
- <listitem>
+ <indexterm>
+ <primary>MySQL Cluster</primary>
+ <secondary>network transporters</secondary>
+ </indexterm>
+
+ <indexterm>
+ <primary>MySQL Cluster</primary>
+ <secondary>SCI (Scalable Coherent Interface)</secondary>
+ </indexterm>
+
+ <indexterm>
+ <primary>SCI (Scalable Coherent Interface)</primary>
+ <see>MySQL Cluster</see>
+ </indexterm>
+
+ <remark role="NOTE">
+ [js] Update following para to mention Windows when 6.4 becomes
+ available.
+ </remark>
+
+ <para>
+ It is possible employing Scalable Coherent Interface (SCI)
+ technology to achieve a significant increase in connection
+ speeds and throughput between MySQL Cluster data and SQL nodes.
+ To use SCI, it is necessary to obtain and install Dolphin SCI
+ network cards and to use the drivers and other software supplied
+ by Dolphin. You can get information on obtaining these, from
+ <ulink url="http://www.dolphinics.com/">Dolphin Interconnect
+ Solutions</ulink>. SCI SuperSocket or SCI Transporter support is
+ available for 32-bit and 64-bit Linux, Solaris, and other
+ platforms. See the Dolphin documentation referenced later in
+ this section for more detailed information regarding platforms
+ supported for SCI.
+ </para>
+
+ <note>
<para>
- No <literal>NULL</literal> values are inserted by the
- application.
+ Prior to MySQL 4.1.24, there were issues with building MySQL
+ Cluster with SCI support (see Bug#25470), but these have been
+ resolved due to work contributed by Dolphin. SCI Sockets are
+ now correctly supported for MySQL Cluster hosts running recent
+ versions of Linux using the <literal>-max</literal> builds,
+ and versions of MySQL Cluster with SCI Transporter support can
+ be built using either of
+ <command>compile-amd64-max-sci</command> or
+ <command>compile-pentium64-max-sci</command>. Both of these
+ build scripts can be found in the <filename>BUILD</filename>
+ directory of the MySQL Cluster source trees; it should not be
+ difficult to adapt them for other platforms. Generally, all
+ that is necessary is to compile MySQL Cluster with SCI
+ Transporter support is to configure the MySQL Cluster build
+ using <option>--with-ndb-sci=/opt/DIS</option>.
</para>
- </listitem>
+ </note>
- </itemizedlist>
+ <para>
+ Once you have acquired the required Dolphin hardware and
+ software, you can obtain detailed information on how to adapt a
+ MySQL Cluster configured for normal TCP/IP communication to use
+ SCI from the <citetitle>Dolphin Express for MySQL Installation
+ and Reference Guide</citetitle>, available for download at
+ <ulink url="http://docsrva.mysql.com/public/DIS_install_guide_book.pdf"/>
+ (PDF file, 94 pages, 753 KB). This document provides
+ instructions for installing the SCI hardware and software, as
+ well as information concerning network topology and
+ configuration.
+ </para>
- <para>
- A good starting point is to determine the amount of time that
- should elapse between local checkpoints (LCPs). It is worth noting
- that, in the event of a system restart, it takes 40-60 percent of
- this interval to execute the REDO log — for example, if the
- time between LCPs is 5 minutes (300 seconds), then it should take
- 2 to 3 minutes (120 to 180 seconds) for the REDO log to be read.
- </para>
+ </section>
- <para>
- The maximum amount of data per node can be assumed to be the size
- of the <literal>DataMemory</literal> parameter. In this example,
- we assume that this is 2 GB. The
- <literal>NoOfDiskPagesToDiskAfterRestartTUP</literal> parameter
- represents the amount of data to be checkpointed per unit time
- — however, this parameter is actually expressed as the
- number of 8K memory pages to be checkpointed per 100 milliseconds.
- 2 GB per 300 seconds is approximately 6.8 MB per second, or 700 KB
- per 100 milliseconds, which works out to roughly 85 pages per 100
- milliseconds.
- </para>
+ <section id="mysql-cluster-interconnects-performance">
- <para>
- Similarly, we can calculate
- <literal>NoOfDiskPagesToDiskAfterRestartACC</literal> in terms of
- the time for local checkpoints and the amount of memory required
- for indexes — that is, the <literal>IndexMemory</literal>.
- Assuming that we allow 512 MB for indexes, this works out to
- approximately 20 8-KB pages per 100 milliseconds for this
- parameter.
- </para>
+ <title>MySQL Cluster Interconnects and Performance</title>
- <para>
- Next, we need to determine the number of REDO log files required
- — that is, fragment log files — the corresponding
- parameter being <literal>NoOfFragmentLogFiles</literal>. We need
- to make sure that there are sufficient REDO log files for keeping
- records for at least 3 local checkpoints. In a production setting,
- there are always uncertainties — for instance, we cannot be
- sure that disks always operate at top speed or with maximum
- throughput. For this reason, it is best to err on the side of
- caution, so we double our requirement and calculate a number of
- fragment log files which should be enough to keep records covering
- 6 local checkpoints.
- </para>
+ <indexterm>
+ <primary>MySQL Cluster</primary>
+ <secondary>performance</secondary>
+ </indexterm>
- <para>
- It is also important to remember that the disk also handles writes
- to the REDO log and UNDO log, so if you find that the amount of
- data being written to disk as determined by the values of
- <literal>NoOfDiskPagesToDiskAfterRestartACC</literal> and
- <literal>NoOfDiskPagesToDiskAfterRestartTUP</literal> is
- approaching the amount of disk bandwidth available, you may wish
- to increase the time between local checkpoints.
- </para>
+ <indexterm>
+ <primary>MySQL Cluster</primary>
+ <secondary>benchmarks</secondary>
+ </indexterm>
- <para>
- Given 5 minutes (300 seconds) per local checkpoint, this means
- that we need to support writing log records at maximum speed for 6
- * 300 = 1800 seconds. The size of a REDO log record is 72 bytes
- plus 4 bytes per updated column value plus the maximum size of the
- updated column, and there is one REDO log record for each table
- record updated in a transaction, on each node where the data
- reside. Using the numbers of operations set out previously in this
- section, we derive the following:
- </para>
+ <para>
+ The <command>ndbd</command> process has a number of simple
+ constructs which are used to access the data in a MySQL Cluster.
+ We have created a very simple benchmark to check the performance
+ of each of these and the effects which various interconnects
+ have on their performance.
+ </para>
- <itemizedlist>
+ <para>
+ There are four access methods:
+ </para>
- <listitem>
- <para>
- 50000 select operations per hour yields 0 log records (and
- thus 0 bytes), since <literal role="stmt">SELECT</literal>
- statements are not recorded in the REDO log.
- </para>
- </listitem>
+ <itemizedlist>
- <listitem>
- <para>
- 15000 <literal role="stmt">DELETE</literal> statements per
- hour is approximately 5 delete operations per second. (Since
- we wish to be conservative in our estimate, we round up here
- and in the following calculations.) No columns are updated by
- deletes, so these statements consume only 5 operations * 72
- bytes per operation = 360 bytes per second.
- </para>
- </listitem>
+ <listitem>
+ <formalpara>
- <listitem>
- <para>
- 15000 <literal role="stmt">UPDATE</literal> statements per
- hour is roughly the same as 5 updates per second. Each update
- uses 72 bytes, plus 4 bytes per column * 5 columns updated,
- plus 32 bytes per column * 5 columns — this works out to
- 72 + 20 + 160 = 252 bytes per operation, and multiplying this
- by 5 operation per second yields 1260 bytes per second.
- </para>
- </listitem>
+ <title>Primary key access</title>
- <listitem>
- <para>
- 15000 <literal role="stmt">INSERT</literal> statements per
- hour is equivalent to 5 insert operations per second. Each
- insert requires REDO log space of 72 bytes, plus 4 bytes per
- record * 40 columns, plus 32 bytes per column * 40 columns,
- which is 72 + 160 + 1280 = 1512 bytes per operation. This
- times 5 operations per second yields 7560 bytes per second.
- </para>
- </listitem>
+ <para>
+ This is access of a record through its primary key. In the
+ simplest case, only one record is accessed at a time,
+ which means that the full cost of setting up a number of
+ TCP/IP messages and a number of costs for context
+ switching are borne by this single request. In the case
+ where multiple primary key accesses are sent in one batch,
+ those accesses share the cost of setting up the necessary
+ TCP/IP messages and context switches. If the TCP/IP
+ messages are for different destinations, additional TCP/IP
+ messages need to be set up.
+ </para>
- </itemizedlist>
+ </formalpara>
+ </listitem>
- <para>
- So the total number of REDO log bytes being written per second is
- approximately 0 + 360 + 1260 + 7560 = 9180 bytes. Multiplied by
- 1800 seconds, this yields 16524000 bytes required for REDO
- logging, or approximately 15.75 MB. The unit used for
- <literal>NoOfFragmentLogFiles</literal> represents a set of 4
- 16-MB log files — that is, 64 MB. Thus, the minimum value
- (3) for this parameter is sufficient for the scenario envisioned
- in this example, since 3 times 64 = 192 MB, or about 12 times what
- is required; the default value of 8 (or 512 MB) is more than ample
- in this case.
- </para>
+ <listitem>
+ <formalpara>
- <para>
- A copy of each altered table record is kept in the UNDO log. In
- the scenario discussed above, the UNDO log would not require any
- more space than what is provided by the default seetings. However,
- given the size of disks, it is sensible to allocate at least 1 GB
- for it.
- </para>
+ <title>Unique key access</title>
+ <para>
+ Unique key accesses are similar to primary key accesses,
+ except that a unique key access is executed as a read on
+ an index table followed by a primary key access on the
+ table. However, only one request is sent from the MySQL
+ Server, and the read of the index table is handled by
+ <command>ndbd</command>. Such requests also benefit from
+ batching.
+ </para>
+
+ </formalpara>
+ </listitem>
+
+ <listitem>
+ <formalpara>
+
+ <title>Full table scan</title>
+
+ <para>
+ When no indexes exist for a lookup on a table, a full
+ table scan is performed. This is sent as a single request
+ to the <command>ndbd</command> process, which then divides
+ the table scan into a set of parallel scans on all cluster
+ <command>ndbd</command> processes. In future versions of
+ MySQL Cluster, an SQL node will be able to filter some of
+ these scans.
+ </para>
+
+ </formalpara>
+ </listitem>
+
+ <listitem>
+ <para>
+ <emphasis role="bold">Range scan using ordered
+ index</emphasis>
+ </para>
+
+ <para>
+ When an ordered index is used, it performs a scan in the
+ same manner as the full table scan, except that it scans
+ only those records which are in the range used by the query
+ transmitted by the MySQL server (SQL node). All partitions
+ are scanned in parallel when all bound index attributes
+ include all attributes in the partitioning key.
+ </para>
+ </listitem>
+
+ </itemizedlist>
+
+ <para>
+ With benchmarks developed internally by MySQL for testing simple
+ and batched primary and unique key accesses, we have found that
+ using SCI sockets improves performance by approximately 100%
+ over TCP/IP, except in rare instances when communication
+ performance is not an issue. This can occur when scan filters
+ make up most of processing time or when very large batches of
+ primary key accesses are achieved. In that case, the CPU
+ processing in the <command>ndbd</command> processes becomes a
+ fairly large part of the overhead.
+ </para>
+
+ <para>
+ Using the SCI transporter instead of SCI Sockets is only of
+ interest in communicating between <command>ndbd</command>
+ processes. Using the SCI transporter is also only of interest if
+ a CPU can be dedicated to the <command>ndbd</command> process
+ because the SCI transporter ensures that this process will never
+ go to sleep. It is also important to ensure that the
+ <command>ndbd</command> process priority is set in such a way
+ that the process does not lose priority due to running for an
+ extended period of time, as can be done by locking processes to
+ CPUs in Linux 2.6. If such a configuration is possible, the
+ <command>ndbd</command> process will benefit by 10−70% as
+ compared with using SCI sockets. (The larger figures will be
+ seen when performing updates and probably on parallel scan
+ operations as well.)
+ </para>
+
+ <para>
+ There are several other optimized socket implementations for
+ computer clusters, including Myrinet, Gigabit Ethernet,
+ Infiniband and the VIA interface. However, we have tested MySQL
+ Cluster so far only with SCI sockets. See
+ <xref linkend="mysql-cluster-sci-sockets"/>, for information on
+ how to set up SCI sockets using ordinary TCP/IP for MySQL
+ Cluster.
+ </para>
+
+ </section>
+
</section>
</section>
Modified: trunk/refman-4.1/mysql-cluster.xml
===================================================================
--- trunk/refman-4.1/mysql-cluster.xml 2009-09-25 07:18:52 UTC (rev 16804)
+++ trunk/refman-4.1/mysql-cluster.xml 2009-09-25 09:34:22 UTC (rev 16805)
Changed blocks: 1, Lines Added: 0, Lines Deleted: 2; 657 bytes
@@ -155,8 +155,6 @@
<xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="mysql-cluster-security.xml"/>
- <xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="mysql-cluster-interconnects.xml"/>
-
<xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="mysql-cluster-limitations.xml"/>
<xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="mysql-cluster-faq.xml"/>
Modified: trunk/refman-5.0/Makefile.depends
===================================================================
--- trunk/refman-5.0/Makefile.depends 2009-09-25 07:18:52 UTC (rev 16804)
+++ trunk/refman-5.0/Makefile.depends 2009-09-25 09:34:22 UTC (rev 16805)
Changed blocks: 14, Lines Added: 2, Lines Deleted: 36; 6386 bytes
@@ -1211,7 +1211,6 @@
dynxml_local_faqs_IDMAPS = \
../ndbapi/metadata/ndb-internals.idmap \
../ndbapi/metadata/overview.idmap \
- ../refman-5.1/metadata/mysql-cluster-disk-data.idmap \
../refman-5.1/metadata/mysql-cluster-management.idmap \
../refman-5.1/metadata/mysql-cluster-replication.idmap \
../refman-5.1/metadata/mysql-cluster-roadmap.idmap \
@@ -1235,7 +1234,6 @@
metadata/installing-core.idmap \
metadata/internationalization.idmap \
metadata/mysql-cluster-configuration-core.idmap \
- metadata/mysql-cluster-interconnects.idmap \
metadata/mysql-cluster-limitations.idmap \
metadata/mysql-cluster-management.idmap \
metadata/mysql-cluster-multi-computer.idmap \
@@ -1834,7 +1832,6 @@
metadata/dba-mysqld-server-core.idmap \
metadata/installing-core.idmap \
metadata/mysql-cluster-configuration-core.idmap \
- metadata/mysql-cluster-interconnects.idmap \
metadata/mysql-cluster-limitations.idmap \
metadata/mysql-cluster-management.idmap \
metadata/mysql-cluster-optvar-core.idmap \
@@ -1948,7 +1945,6 @@
metadata/internationalization.idmap \
metadata/introduction.idmap \
metadata/mysql-cluster-configuration-core.idmap \
- metadata/mysql-cluster-interconnects.idmap \
metadata/mysql-cluster-management.idmap \
metadata/mysql-cluster-programs-core.idmap \
metadata/news-5.0-core.idmap \
@@ -3381,7 +3377,6 @@
monitor-refman-chapter-base.xml \
mysql-cluster-configuration-core.xml \
mysql-cluster-glossary.xml \
- mysql-cluster-interconnects.xml \
mysql-cluster-limitations.xml \
mysql-cluster-management.xml \
mysql-cluster-multi-computer.xml \
@@ -3690,7 +3685,6 @@
../refman-4.1/metadata/sql-syntax-data-definition.idmap \
../refman-5.1/metadata/errors-problems-core.idmap \
../refman-5.1/metadata/introduction.idmap \
- ../refman-5.1/metadata/mysql-cluster-disk-data.idmap \
../refman-5.1/metadata/mysql-cluster-management.idmap \
../refman-5.1/metadata/mysql-cluster-replication.idmap \
../refman-5.1/metadata/mysql-cluster-roadmap.idmap \
@@ -3766,7 +3760,6 @@
metadata/language-structure-core.idmap \
metadata/mysql-cluster-configuration-core.idmap \
metadata/mysql-cluster-glossary.idmap \
- metadata/mysql-cluster-interconnects.idmap \
metadata/mysql-cluster-limitations.idmap \
metadata/mysql-cluster-management.idmap \
metadata/mysql-cluster-multi-computer.idmap \
@@ -4241,9 +4234,8 @@
mysql_cluster_glossary_IMAGES =
mysql_cluster_glossary_SOURCES = mysql-cluster-glossary.xml $(mysql_cluster_glossary_INCLUDES)
mysql_cluster_glossary_IDMAPS = \
- ../refman-5.1/metadata/mysql-cluster-disk-data.idmap \
+ ../refman-5.1/metadata/mysql-cluster-management.idmap \
metadata/mysql-cluster-configuration-core.idmap \
- metadata/mysql-cluster-interconnects.idmap \
metadata/mysql-cluster-management.idmap \
metadata/mysql-cluster-overview.idmap \
metadata/mysql-cluster-programs-core.idmap
@@ -4256,28 +4248,6 @@
mysql-cluster-glossary-manprepped.xml: $(mysql_cluster_glossary_SOURCES) $(mysql_cluster_glossary_IDMAPS)
mysql-cluster-glossary-remprepped.xml: $(mysql_cluster_glossary_SOURCES) $(mysql_cluster_glossary_IDMAPS)
-mysql_cluster_interconnects_INCLUDES = \
- ../common/fixedchars.ent \
- ../common/phrases.ent \
- ../mysql-monitor-2.0/version.ent \
- ../mysql-monitor-common/enterprise.ent \
- ../refman-common/urls.ent \
- all-entities.ent \
- versions.ent
-mysql_cluster_interconnects_IMAGES =
-mysql_cluster_interconnects_SOURCES = mysql-cluster-interconnects.xml $(mysql_cluster_interconnects_INCLUDES)
-mysql_cluster_interconnects_IDMAPS = \
- metadata/mysql-cluster-configuration-core.idmap \
- metadata/mysql-cluster-interconnects.idmap
-mysql-cluster-interconnects.validpure: $(mysql_cluster_interconnects_SOURCES)
-mysql-cluster-interconnects.titles: $(mysql_cluster_interconnects_SOURCES)
-mysql-cluster-interconnects.useless: $(mysql_cluster_interconnects_SOURCES)
-mysql-cluster-interconnects.valid: $(mysql_cluster_interconnects_SOURCES) $(mysql_cluster_interconnects_IDMAPS)
-mysql-cluster-interconnects.validwarn: $(mysql_cluster_interconnects_SOURCES) $(mysql_cluster_interconnects_IDMAPS)
-mysql-cluster-interconnects-prepped.xml: $(mysql_cluster_interconnects_SOURCES) $(mysql_cluster_interconnects_IDMAPS)
-mysql-cluster-interconnects-manprepped.xml: $(mysql_cluster_interconnects_SOURCES) $(mysql_cluster_interconnects_IDMAPS)
-mysql-cluster-interconnects-remprepped.xml: $(mysql_cluster_interconnects_SOURCES) $(mysql_cluster_interconnects_IDMAPS)
-
mysql_cluster_limitations_INCLUDES = \
../common/fixedchars.ent \
../common/phrases.ent \
@@ -4404,7 +4374,6 @@
../ndbapi/metadata/getting-started.idmap \
../ndbapi/metadata/mgm-api.idmap \
metadata/mysql-cluster-configuration-core.idmap \
- metadata/mysql-cluster-interconnects.idmap \
metadata/mysql-cluster-limitations.idmap \
metadata/mysql-cluster-overview.idmap \
metadata/mysql-cluster-security.idmap
@@ -4517,7 +4486,6 @@
dynxml-local-mysql-cluster-programs.xml \
mysql-cluster-configuration-core.xml \
mysql-cluster-glossary.xml \
- mysql-cluster-interconnects.xml \
mysql-cluster-limitations.xml \
mysql-cluster-management.xml \
mysql-cluster-multi-computer.xml \
@@ -4545,7 +4513,7 @@
../ndbapi/metadata/ndb-errors.idmap \
../ndbapi/metadata/ndb-internals.idmap \
../refman-4.1/metadata/mysql-cluster-programs-core.idmap \
- ../refman-5.1/metadata/mysql-cluster-disk-data.idmap \
+ ../refman-5.1/metadata/mysql-cluster-management.idmap \
../refman-5.1/metadata/mysql-cluster.idmap \
../refman-common/metadata/bug-reports.idmap \
metadata/dba-mysqld-server-core.idmap \
@@ -4554,7 +4522,6 @@
metadata/installing-core.idmap \
metadata/mysql-cluster-configuration-core.idmap \
metadata/mysql-cluster-glossary.idmap \
- metadata/mysql-cluster-interconnects.idmap \
metadata/mysql-cluster-limitations.idmap \
metadata/mysql-cluster-management.idmap \
metadata/mysql-cluster-multi-computer.idmap \
@@ -4731,7 +4698,6 @@
metadata/internationalization.idmap \
metadata/introduction.idmap \
metadata/mysql-cluster-configuration-core.idmap \
- metadata/mysql-cluster-interconnects.idmap \
metadata/mysql-cluster-management.idmap \
metadata/mysql-cluster-programs-core.idmap \
metadata/news-5.0-core.idmap \
Modified: trunk/refman-5.0/mysql-cluster-configuration-core.xml
===================================================================
--- trunk/refman-5.0/mysql-cluster-configuration-core.xml 2009-09-25 07:18:52 UTC (rev 16804)
+++ trunk/refman-5.0/mysql-cluster-configuration-core.xml 2009-09-25 09:34:22 UTC (rev 16805)
Changed blocks: 2, Lines Added: 512, Lines Deleted: 194; 30877 bytes
@@ -5165,6 +5165,269 @@
</section>
+ <section id="mysql-cluster-config-lcp-params">
+
+ <title>Configuring MySQL Cluster Parameters for Local Checkpoints</title>
+
+ <indexterm>
+ <primary>DataMemory</primary>
+ </indexterm>
+
+ <indexterm>
+ <primary>IndexMemory</primary>
+ </indexterm>
+
+ <indexterm>
+ <primary>NoOfDiskPagesToDiskAfterRestartACC</primary>
+ <secondary>calculating</secondary>
+ </indexterm>
+
+ <indexterm>
+ <primary>NoOfDiskPagesToDiskAfterRestartTUP</primary>
+ <secondary>calculating</secondary>
+ </indexterm>
+
+ <indexterm>
+ <primary>NoOfFragmentLogFiles</primary>
+ <secondary>calculating</secondary>
+ </indexterm>
+
+ <indexterm>
+ <primary>local checkpoints (MySQL Cluster)</primary>
+ </indexterm>
+
+ <indexterm>
+ <primary>MySQL Cluster</primary>
+ <secondary>configuration</secondary>
+ </indexterm>
+
+ <remark role="note">
+ [js] Adapted from an item in Mikael Ronström's blog,
+ 2006-07-11.
+ </remark>
+
+ <para>
+ The parameters discussed in
+ <link linkend="mysql-cluster-logging-and-checkpointing">Logging
+ and Checkpointing</link> and in
+ <link linkend="mysql-cluster-data-memory-index-memory-string-memory">Data
+ Memory, Index Memory, and String Memory</link> that are used to
+ configure local checkpoints for a MySQL Cluster do not exist in
+ isolation, but rather are very much interdepedent on each other.
+ In this section, we illustrate how these parameters —
+ including <literal>DataMemory</literal>,
+ <literal>IndexMemory</literal>,
+ <literal>NoOfDiskPagesToDiskAfterRestartTUP</literal>,
+ <literal>NoOfDiskPagesToDiskAfterRestartACC</literal>, and
+ <literal>NoOfFragmentLogFiles</literal> — relate to one
+ another in a working Cluster.
+ </para>
+
+ <para>
+ In this example, we assume that our application performs the
+ following numbers of types of operations per hour:
+ </para>
+
+ <itemizedlist>
+
+ <listitem>
+ <para>
+ 50000 selects
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ 15000 inserts
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ 15000 updates
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ 15000 deletes
+ </para>
+ </listitem>
+
+ </itemizedlist>
+
+ <para>
+ We also make the following assumptions about the data used in
+ the application:
+ </para>
+
+ <itemizedlist>
+
+ <listitem>
+ <para>
+ We are working with a single table having 40 columns.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ Each column can hold up to 32 bytes of data.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ A typical <literal role="stmt">UPDATE</literal> run by the
+ application affects the values of 5 columns.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ No <literal>NULL</literal> values are inserted by the
+ application.
+ </para>
+ </listitem>
+
+ </itemizedlist>
+
+ <para>
+ A good starting point is to determine the amount of time that
+ should elapse between local checkpoints (LCPs). It is worth
+ noting that, in the event of a system restart, it takes 40-60
+ percent of this interval to execute the REDO log — for
+ example, if the time between LCPs is 5 minutes (300 seconds),
+ then it should take 2 to 3 minutes (120 to 180 seconds) for the
+ REDO log to be read.
+ </para>
+
+ <para>
+ The maximum amount of data per node can be assumed to be the
+ size of the <literal>DataMemory</literal> parameter. In this
+ example, we assume that this is 2 GB. The
+ <literal>NoOfDiskPagesToDiskAfterRestartTUP</literal> parameter
+ represents the amount of data to be checkpointed per unit time
+ — however, this parameter is actually expressed as the
+ number of 8K memory pages to be checkpointed per 100
+ milliseconds. 2 GB per 300 seconds is approximately 6.8 MB per
+ second, or 700 KB per 100 milliseconds, which works out to
+ roughly 85 pages per 100 milliseconds.
+ </para>
+
+ <para>
+ Similarly, we can calculate
+ <literal>NoOfDiskPagesToDiskAfterRestartACC</literal> in terms
+ of the time for local checkpoints and the amount of memory
+ required for indexes — that is, the
+ <literal>IndexMemory</literal>. Assuming that we allow 512 MB
+ for indexes, this works out to approximately 20 8-KB pages per
+ 100 milliseconds for this parameter.
+ </para>
+
+ <para>
+ Next, we need to determine the number of REDO log files required
+ — that is, fragment log files — the corresponding
+ parameter being <literal>NoOfFragmentLogFiles</literal>. We need
+ to make sure that there are sufficient REDO log files for
+ keeping records for at least 3 local checkpoints. In a
+ production setting, there are always uncertainties — for
+ instance, we cannot be sure that disks always operate at top
+ speed or with maximum throughput. For this reason, it is best to
+ err on the side of caution, so we double our requirement and
+ calculate a number of fragment log files which should be enough
+ to keep records covering 6 local checkpoints.
+ </para>
+
+ <para>
+ It is also important to remember that the disk also handles
+ writes to the REDO log and UNDO log, so if you find that the
+ amount of data being written to disk as determined by the values
+ of <literal>NoOfDiskPagesToDiskAfterRestartACC</literal> and
+ <literal>NoOfDiskPagesToDiskAfterRestartTUP</literal> is
+ approaching the amount of disk bandwidth available, you may wish
+ to increase the time between local checkpoints.
+ </para>
+
+ <para>
+ Given 5 minutes (300 seconds) per local checkpoint, this means
+ that we need to support writing log records at maximum speed for
+ 6 * 300 = 1800 seconds. The size of a REDO log record is 72
+ bytes plus 4 bytes per updated column value plus the maximum
+ size of the updated column, and there is one REDO log record for
+ each table record updated in a transaction, on each node where
+ the data reside. Using the numbers of operations set out
+ previously in this section, we derive the following:
+ </para>
+
+ <itemizedlist>
+
+ <listitem>
+ <para>
+ 50000 select operations per hour yields 0 log records (and
+ thus 0 bytes), since <literal role="stmt">SELECT</literal>
+ statements are not recorded in the REDO log.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ 15000 <literal role="stmt">DELETE</literal> statements per
+ hour is approximately 5 delete operations per second. (Since
+ we wish to be conservative in our estimate, we round up here
+ and in the following calculations.) No columns are updated
+ by deletes, so these statements consume only 5 operations *
+ 72 bytes per operation = 360 bytes per second.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ 15000 <literal role="stmt">UPDATE</literal> statements per
+ hour is roughly the same as 5 updates per second. Each
+ update uses 72 bytes, plus 4 bytes per column * 5 columns
+ updated, plus 32 bytes per column * 5 columns — this
+ works out to 72 + 20 + 160 = 252 bytes per operation, and
+ multiplying this by 5 operation per second yields 1260 bytes
+ per second.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ 15000 <literal role="stmt">INSERT</literal> statements per
+ hour is equivalent to 5 insert operations per second. Each
+ insert requires REDO log space of 72 bytes, plus 4 bytes per
+ record * 40 columns, plus 32 bytes per column * 40 columns,
+ which is 72 + 160 + 1280 = 1512 bytes per operation. This
+ times 5 operations per second yields 7560 bytes per second.
+ </para>
+ </listitem>
+
+ </itemizedlist>
+
+ <para>
+ So the total number of REDO log bytes being written per second
+ is approximately 0 + 360 + 1260 + 7560 = 9180 bytes. Multiplied
+ by 1800 seconds, this yields 16524000 bytes required for REDO
+ logging, or approximately 15.75 MB. The unit used for
+ <literal>NoOfFragmentLogFiles</literal> represents a set of 4
+ 16-MB log files — that is, 64 MB. Thus, the minimum value
+ (3) for this parameter is sufficient for the scenario envisioned
+ in this example, since 3 times 64 = 192 MB, or about 12 times
+ what is required; the default value of 8 (or 512 MB) is more
+ than ample in this case.
+ </para>
+
+ <para>
+ A copy of each altered table record is kept in the UNDO log. In
+ the scenario discussed above, the UNDO log would not require any
+ more space than what is provided by the default seetings.
+ However, given the size of disks, it is sensible to allocate at
+ least 1 GB for it.
+ </para>
+
+ </section>
+
</section>
<section id="mysql-cluster-params-overview">
@@ -5470,264 +5733,319 @@
</section>
- <section id="mysql-cluster-config-lcp-params">
+ <section id="mysql-cluster-interconnects">
- <title>Configuring MySQL Cluster Parameters for Local Checkpoints</title>
+ <title>Using High-Speed Interconnects with MySQL Cluster</title>
<indexterm>
- <primary>DataMemory</primary>
+ <primary>MySQL Cluster</primary>
+ <secondary>network transporters</secondary>
</indexterm>
<indexterm>
- <primary>IndexMemory</primary>
- </indexterm>
-
- <indexterm>
- <primary>NoOfDiskPagesToDiskAfterRestartACC</primary>
- <secondary>calculating</secondary>
- </indexterm>
-
- <indexterm>
- <primary>NoOfDiskPagesToDiskAfterRestartTUP</primary>
- <secondary>calculating</secondary>
- </indexterm>
-
- <indexterm>
- <primary>NoOfFragmentLogFiles</primary>
- <secondary>calculating</secondary>
- </indexterm>
-
- <indexterm>
- <primary>local checkpoints (MySQL Cluster)</primary>
- </indexterm>
-
- <indexterm>
<primary>MySQL Cluster</primary>
- <secondary>configuration</secondary>
+ <secondary>interconnects</secondary>
</indexterm>
- <remark role="note">
- [js] Adapted from an item in Mikael Ronström's blog, 2006-07-11.
- </remark>
-
<para>
- The parameters discussed in
- <link linkend="mysql-cluster-logging-and-checkpointing">Logging
- and Checkpointing</link> and in
- <link linkend="mysql-cluster-data-memory-index-memory-string-memory">Data
- Memory, Index Memory, and String Memory</link> that are used to
- configure local checkpoints for a MySQL Cluster do not exist in
- isolation, but rather are very much interdepedent on each other.
- In this section, we illustrate how these parameters —
- including <literal>DataMemory</literal>,
- <literal>IndexMemory</literal>,
- <literal>NoOfDiskPagesToDiskAfterRestartTUP</literal>,
- <literal>NoOfDiskPagesToDiskAfterRestartACC</literal>, and
- <literal>NoOfFragmentLogFiles</literal> — relate to one
- another in a working Cluster.
+ Even before design of <literal role="se">NDBCLUSTER</literal>
+ began in 1996, it was evident that one of the major problems to be
+ encountered in building parallel databases would be communication
+ between the nodes in the network. For this reason,
+ <literal role="se">NDBCLUSTER</literal> was designed from the very
+ beginning to allow for the use of a number of different data
+ transport mechanisms. In this Manual, we use the term
+ <firstterm>transporter</firstterm> for these.
</para>
<para>
- In this example, we assume that our application performs the
- following numbers of types of operations per hour:
+ The MySQL Cluster codebase includes support for four different
+ transporters:
</para>
<itemizedlist>
<listitem>
<para>
- 50000 selects
+ <emphasis>TCP/IP using 100 Mbps or gigabit
+ Ethernet</emphasis>, as discussed in
+ <xref linkend="mysql-cluster-tcp-definition"/>.
</para>
</listitem>
<listitem>
<para>
- 15000 inserts
+ <emphasis>Direct (machine-to-machine) TCP/IP</emphasis>;
+ although this transporter uses the same TCP/IP protocol as
+ mentioned in the previous item, it requires setting up the
+ hardware differently and is configured differently as well.
+ For this reason, it is considered a separate transport
+ mechanism for MySQL Cluster. See
+ <xref linkend="mysql-cluster-tcp-definition-direct"/>, for
+ details.
</para>
</listitem>
<listitem>
<para>
- 15000 updates
+ <emphasis>Shared memory (SHM)</emphasis>. For more information
+ about SHM, see <xref linkend="mysql-cluster-shm-definition"/>.
</para>
</listitem>
<listitem>
<para>
- 15000 deletes
+ <emphasis>Scalable Coherent Interface (SCI)</emphasis>, as
+ described in the next section of this chapter,
+ <xref linkend="mysql-cluster-sci-definition"/>.
</para>
</listitem>
</itemizedlist>
<para>
- We also make the following assumptions about the data used in the
- application:
+ Most users today employ TCP/IP over Ethernet because it is
+ ubiquitous. TCP/IP is also by far the best-tested transporter for
+ use with MySQL Cluster.
</para>
- <itemizedlist>
+ <para>
+ We are working to make sure that communication with the
+ <command>ndbd</command> process is made in <quote>chunks</quote>
+ that are as large as possible because this benefits all types of
+ data transmission.
+ </para>
- <listitem>
- <para>
- We are working with a single table having 40 columns.
- </para>
- </listitem>
+ <para>
+ For users who desire it, it is also possible to use cluster
+ interconnects to enhance performance even further. There are two
+ ways to achieve this: Either a custom transporter can be designed
+ to handle this case, or you can use socket implementations that
+ bypass the TCP/IP stack to one extent or another. We have
+ experimented with both of these techniques using the SCI (Scalable
+ Coherent Interface) technology developed by
+ <ulink url="http://www.dolphinics.com/">Dolphin Interconnect
+ Solutions</ulink>.
+ </para>
- <listitem>
- <para>
- Each column can hold up to 32 bytes of data.
- </para>
- </listitem>
+ <section id="mysql-cluster-sci-sockets">
- <listitem>
- <para>
- A typical <literal role="stmt">UPDATE</literal> run by the
- application affects the values of 5 columns.
- </para>
- </listitem>
+ <title>Configuring MySQL Cluster to use SCI Sockets</title>
- <listitem>
+ <indexterm>
+ <primary>MySQL Cluster</primary>
+ <secondary>network transporters</secondary>
+ </indexterm>
+
+ <indexterm>
+ <primary>MySQL Cluster</primary>
+ <secondary>SCI (Scalable Coherent Interface)</secondary>
+ </indexterm>
+
+ <indexterm>
+ <primary>SCI (Scalable Coherent Interface)</primary>
+ <see>MySQL Cluster</see>
+ </indexterm>
+
+ <remark role="NOTE">
+ [js] Update following para to mention Windows when 6.4 becomes
+ available.
+ </remark>
+
+ <para>
+ It is possible employing Scalable Coherent Interface (SCI)
+ technology to achieve a significant increase in connection
+ speeds and throughput between MySQL Cluster data and SQL nodes.
+ To use SCI, it is necessary to obtain and install Dolphin SCI
+ network cards and to use the drivers and other software supplied
+ by Dolphin. You can get information on obtaining these, from
+ <ulink url="http://www.dolphinics.com/">Dolphin Interconnect
+ Solutions</ulink>. SCI SuperSocket or SCI Transporter support is
+ available for 32-bit and 64-bit Linux, Solaris, and other
+ platforms. See the Dolphin documentation referenced later in
+ this section for more detailed information regarding platforms
+ supported for SCI.
+ </para>
+
+ <note>
<para>
- No <literal>NULL</literal> values are inserted by the
- application.
+ Prior to MySQL 5.0.66, there were issues with building MySQL
+ Cluster with SCI support (see Bug#25470), but these have been
+ resolved due to work contributed by Dolphin. SCI Sockets are
+ now correctly supported for MySQL Cluster hosts running recent
+ versions of Linux using the <literal>-max</literal> builds,
+ and versions of MySQL Cluster with SCI Transporter support can
+ be built using either of
+ <command>compile-amd64-max-sci</command> or
+ <command>compile-pentium64-max-sci</command>. Both of these
+ build scripts can be found in the <filename>BUILD</filename>
+ directory of the MySQL Cluster source trees; it should not be
+ difficult to adapt them for other platforms. Generally, all
+ that is necessary is to compile MySQL Cluster with SCI
+ Transporter support is to configure the MySQL Cluster build
+ using <option>--with-ndb-sci=/opt/DIS</option>.
</para>
- </listitem>
+ </note>
- </itemizedlist>
+ <para>
+ Once you have acquired the required Dolphin hardware and
+ software, you can obtain detailed information on how to adapt a
+ MySQL Cluster configured for normal TCP/IP communication to use
+ SCI from the <citetitle>Dolphin Express for MySQL Installation
+ and Reference Guide</citetitle>, available for download at
+ <ulink url="http://docsrva.mysql.com/public/DIS_install_guide_book.pdf"/>
+ (PDF file, 94 pages, 753 KB). This document provides
+ instructions for installing the SCI hardware and software, as
+ well as information concerning network topology and
+ configuration.
+ </para>
- <para>
- A good starting point is to determine the amount of time that
- should elapse between local checkpoints (LCPs). It is worth noting
- that, in the event of a system restart, it takes 40-60 percent of
- this interval to execute the REDO log — for example, if the
- time between LCPs is 5 minutes (300 seconds), then it should take
- 2 to 3 minutes (120 to 180 seconds) for the REDO log to be read.
- </para>
+ </section>
- <para>
- The maximum amount of data per node can be assumed to be the size
- of the <literal>DataMemory</literal> parameter. In this example,
- we assume that this is 2 GB. The
- <literal>NoOfDiskPagesToDiskAfterRestartTUP</literal> parameter
- represents the amount of data to be checkpointed per unit time
- — however, this parameter is actually expressed as the
- number of 8K memory pages to be checkpointed per 100 milliseconds.
- 2 GB per 300 seconds is approximately 6.8 MB per second, or 700 KB
- per 100 milliseconds, which works out to roughly 85 pages per 100
- milliseconds.
- </para>
+ <section id="mysql-cluster-interconnects-performance">
- <para>
- Similarly, we can calculate
- <literal>NoOfDiskPagesToDiskAfterRestartACC</literal> in terms of
- the time for local checkpoints and the amount of memory required
- for indexes — that is, the <literal>IndexMemory</literal>.
- Assuming that we allow 512 MB for indexes, this works out to
- approximately 20 8-KB pages per 100 milliseconds for this
- parameter.
- </para>
+ <title>MySQL Cluster Interconnects and Performance</title>
- <para>
- Next, we need to determine the number of REDO log files required
- — that is, fragment log files — the corresponding
- parameter being <literal>NoOfFragmentLogFiles</literal>. We need
- to make sure that there are sufficient REDO log files for keeping
- records for at least 3 local checkpoints. In a production setting,
- there are always uncertainties — for instance, we cannot be
- sure that disks always operate at top speed or with maximum
- throughput. For this reason, it is best to err on the side of
- caution, so we double our requirement and calculate a number of
- fragment log files which should be enough to keep records covering
- 6 local checkpoints.
- </para>
+ <indexterm>
+ <primary>MySQL Cluster</primary>
+ <secondary>performance</secondary>
+ </indexterm>
- <para>
- It is also important to remember that the disk also handles writes
- to the REDO log and UNDO log, so if you find that the amount of
- data being written to disk as determined by the values of
- <literal>NoOfDiskPagesToDiskAfterRestartACC</literal> and
- <literal>NoOfDiskPagesToDiskAfterRestartTUP</literal> is
- approaching the amount of disk bandwidth available, you may wish
- to increase the time between local checkpoints.
- </para>
+ <indexterm>
+ <primary>MySQL Cluster</primary>
+ <secondary>benchmarks</secondary>
+ </indexterm>
- <para>
- Given 5 minutes (300 seconds) per local checkpoint, this means
- that we need to support writing log records at maximum speed for 6
- * 300 = 1800 seconds. The size of a REDO log record is 72 bytes
- plus 4 bytes per updated column value plus the maximum size of the
- updated column, and there is one REDO log record for each table
- record updated in a transaction, on each node where the data
- reside. Using the numbers of operations set out previously in this
- section, we derive the following:
- </para>
+ <para>
+ The <command>ndbd</command> process has a number of simple
+ constructs which are used to access the data in a MySQL Cluster.
+ We have created a very simple benchmark to check the performance
+ of each of these and the effects which various interconnects
+ have on their performance.
+ </para>
- <itemizedlist>
+ <para>
+ There are four access methods:
+ </para>
- <listitem>
- <para>
- 50000 select operations per hour yields 0 log records (and
- thus 0 bytes), since <literal role="stmt">SELECT</literal>
- statements are not recorded in the REDO log.
- </para>
- </listitem>
+ <itemizedlist>
- <listitem>
- <para>
- 15000 <literal role="stmt">DELETE</literal> statements per
- hour is approximately 5 delete operations per second. (Since
- we wish to be conservative in our estimate, we round up here
- and in the following calculations.) No columns are updated by
- deletes, so these statements consume only 5 operations * 72
- bytes per operation = 360 bytes per second.
- </para>
- </listitem>
+ <listitem>
+ <formalpara>
- <listitem>
- <para>
- 15000 <literal role="stmt">UPDATE</literal> statements per
- hour is roughly the same as 5 updates per second. Each update
- uses 72 bytes, plus 4 bytes per column * 5 columns updated,
- plus 32 bytes per column * 5 columns — this works out to
- 72 + 20 + 160 = 252 bytes per operation, and multiplying this
- by 5 operation per second yields 1260 bytes per second.
- </para>
- </listitem>
+ <title>Primary key access</title>
- <listitem>
- <para>
- 15000 <literal role="stmt">INSERT</literal> statements per
- hour is equivalent to 5 insert operations per second. Each
- insert requires REDO log space of 72 bytes, plus 4 bytes per
- record * 40 columns, plus 32 bytes per column * 40 columns,
- which is 72 + 160 + 1280 = 1512 bytes per operation. This
- times 5 operations per second yields 7560 bytes per second.
- </para>
- </listitem>
+ <para>
+ This is access of a record through its primary key. In the
+ simplest case, only one record is accessed at a time,
+ which means that the full cost of setting up a number of
+ TCP/IP messages and a number of costs for context
+ switching are borne by this single request. In the case
+ where multiple primary key accesses are sent in one batch,
+ those accesses share the cost of setting up the necessary
+ TCP/IP messages and context switches. If the TCP/IP
+ messages are for different destinations, additional TCP/IP
+ messages need to be set up.
+ </para>
- </itemizedlist>
+ </formalpara>
+ </listitem>
- <para>
- So the total number of REDO log bytes being written per second is
- approximately 0 + 360 + 1260 + 7560 = 9180 bytes. Multiplied by
- 1800 seconds, this yields 16524000 bytes required for REDO
- logging, or approximately 15.75 MB. The unit used for
- <literal>NoOfFragmentLogFiles</literal> represents a set of 4
- 16-MB log files — that is, 64 MB. Thus, the minimum value
- (3) for this parameter is sufficient for the scenario envisioned
- in this example, since 3 times 64 = 192 MB, or about 12 times what
- is required; the default value of 8 (or 512 MB) is more than ample
- in this case.
- </para>
+ <listitem>
+ <formalpara>
- <para>
- A copy of each altered table record is kept in the UNDO log. In
- the scenario discussed above, the UNDO log would not require any
- more space than what is provided by the default seetings. However,
- given the size of disks, it is sensible to allocate at least 1 GB
- for it.
- </para>
+ <title>Unique key access</title>
+ <para>
+ Unique key accesses are similar to primary key accesses,
+ except that a unique key access is executed as a read on
+ an index table followed by a primary key access on the
+ table. However, only one request is sent from the MySQL
+ Server, and the read of the index table is handled by
+ <command>ndbd</command>. Such requests also benefit from
+ batching.
+ </para>
+
+ </formalpara>
+ </listitem>
+
+ <listitem>
+ <formalpara>
+
+ <title>Full table scan</title>
+
+ <para>
+ When no indexes exist for a lookup on a table, a full
+ table scan is performed. This is sent as a single request
+ to the <command>ndbd</command> process, which then divides
+ the table scan into a set of parallel scans on all cluster
+ <command>ndbd</command> processes. In future versions of
+ MySQL Cluster, an SQL node will be able to filter some of
+ these scans.
+ </para>
+
+ </formalpara>
+ </listitem>
+
+ <listitem>
+ <para>
+ <emphasis role="bold">Range scan using ordered
+ index</emphasis>
+ </para>
+
+ <para>
+ When an ordered index is used, it performs a scan in the
+ same manner as the full table scan, except that it scans
+ only those records which are in the range used by the query
+ transmitted by the MySQL server (SQL node). All partitions
+ are scanned in parallel when all bound index attributes
+ include all attributes in the partitioning key.
+ </para>
+ </listitem>
+
+ </itemizedlist>
+
+ <para>
+ With benchmarks developed internally by MySQL for testing simple
+ and batched primary and unique key accesses, we have found that
+ using SCI sockets improves performance by approximately 100%
+ over TCP/IP, except in rare instances when communication
+ performance is not an issue. This can occur when scan filters
+ make up most of processing time or when very large batches of
+ primary key accesses are achieved. In that case, the CPU
+ processing in the <command>ndbd</command> processes becomes a
+ fairly large part of the overhead.
+ </para>
+
+ <para>
+ Using the SCI transporter instead of SCI Sockets is only of
+ interest in communicating between <command>ndbd</command>
+ processes. Using the SCI transporter is also only of interest if
+ a CPU can be dedicated to the <command>ndbd</command> process
+ because the SCI transporter ensures that this process will never
+ go to sleep. It is also important to ensure that the
+ <command>ndbd</command> process priority is set in such a way
+ that the process does not lose priority due to running for an
+ extended period of time, as can be done by locking processes to
+ CPUs in Linux 2.6. If such a configuration is possible, the
+ <command>ndbd</command> process will benefit by 10−70% as
+ compared with using SCI sockets. (The larger figures will be
+ seen when performing updates and probably on parallel scan
+ operations as well.)
+ </para>
+
+ <para>
+ There are several other optimized socket implementations for
+ computer clusters, including Myrinet, Gigabit Ethernet,
+ Infiniband and the VIA interface. However, we have tested MySQL
+ Cluster so far only with SCI sockets. See
+ <xref linkend="mysql-cluster-sci-sockets"/>, for information on
+ how to set up SCI sockets using ordinary TCP/IP for MySQL
+ Cluster.
+ </para>
+
+ </section>
+
</section>
</section>
Modified: trunk/refman-5.0/mysql-cluster.xml
===================================================================
--- trunk/refman-5.0/mysql-cluster.xml 2009-09-25 07:18:52 UTC (rev 16804)
+++ trunk/refman-5.0/mysql-cluster.xml 2009-09-25 09:34:22 UTC (rev 16805)
Changed blocks: 1, Lines Added: 0, Lines Deleted: 2; 661 bytes
@@ -152,8 +152,6 @@
<xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="mysql-cluster-security.xml"/>
- <xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="mysql-cluster-interconnects.xml"/>
-
<xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="mysql-cluster-limitations.xml"/>
<xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="mysql-cluster-roadmap.xml"/>
Modified: trunk/refman-5.1/Makefile.depends
===================================================================
--- trunk/refman-5.1/Makefile.depends 2009-09-25 07:18:52 UTC (rev 16804)
+++ trunk/refman-5.1/Makefile.depends 2009-09-25 09:34:22 UTC (rev 16805)
Changed blocks: 18, Lines Added: 3, Lines Deleted: 73; 9320 bytes
@@ -1467,8 +1467,6 @@
metadata/installing-core.idmap \
metadata/internationalization.idmap \
metadata/mysql-cluster-configuration-core.idmap \
- metadata/mysql-cluster-disk-data.idmap \
- metadata/mysql-cluster-interconnects.idmap \
metadata/mysql-cluster-limitations.idmap \
metadata/mysql-cluster-management.idmap \
metadata/mysql-cluster-multi-computer.idmap \
@@ -2108,8 +2106,6 @@
metadata/dba-mysqld-server-core.idmap \
metadata/installing-core.idmap \
metadata/mysql-cluster-configuration-core.idmap \
- metadata/mysql-cluster-disk-data.idmap \
- metadata/mysql-cluster-interconnects.idmap \
metadata/mysql-cluster-limitations.idmap \
metadata/mysql-cluster-management.idmap \
metadata/mysql-cluster-optvar-core.idmap \
@@ -2291,7 +2287,6 @@
../ndbapi/metadata/ndb-errors.idmap \
../ndbapi/metadata/ndb-internals.idmap \
metadata/mysql-cluster-configuration-core.idmap \
- metadata/mysql-cluster-disk-data.idmap \
metadata/mysql-cluster-glossary.idmap \
metadata/mysql-cluster-limitations.idmap \
metadata/mysql-cluster-management.idmap \
@@ -2345,7 +2340,6 @@
metadata/introduction.idmap \
metadata/language-structure-core.idmap \
metadata/mysql-cluster-configuration-core.idmap \
- metadata/mysql-cluster-interconnects.idmap \
metadata/mysql-cluster-management.idmap \
metadata/mysql-cluster-multi-computer.idmap \
metadata/mysql-cluster-news-core.idmap \
@@ -2973,7 +2967,7 @@
metadata/information-schema.idmap \
metadata/internationalization.idmap \
metadata/language-structure-core.idmap \
- metadata/mysql-cluster-disk-data.idmap \
+ metadata/mysql-cluster-management.idmap \
metadata/mysql-cluster-programs-core.idmap \
metadata/news-5.1-core.idmap \
metadata/partitioning.idmap \
@@ -3086,7 +3080,7 @@
metadata/internationalization.idmap \
metadata/introduction.idmap \
metadata/language-structure-core.idmap \
- metadata/mysql-cluster-disk-data.idmap \
+ metadata/mysql-cluster-management.idmap \
metadata/mysql-cluster-replication.idmap \
metadata/mysql-cluster-roadmap.idmap \
metadata/mysql-cluster.idmap \
@@ -3944,9 +3938,7 @@
monitor-refman-chapter-aspec.xml.mem-replication.none.section.xml \
monitor-refman-chapter-base.xml \
mysql-cluster-configuration-core.xml \
- mysql-cluster-disk-data.xml \
mysql-cluster-glossary.xml \
- mysql-cluster-interconnects.xml \
mysql-cluster-limitations.xml \
mysql-cluster-management.xml \
mysql-cluster-multi-computer.xml \
@@ -4376,9 +4368,7 @@
metadata/introduction.idmap \
metadata/language-structure-core.idmap \
metadata/mysql-cluster-configuration-core.idmap \
- metadata/mysql-cluster-disk-data.idmap \
metadata/mysql-cluster-glossary.idmap \
- metadata/mysql-cluster-interconnects.idmap \
metadata/mysql-cluster-limitations.idmap \
metadata/mysql-cluster-management.idmap \
metadata/mysql-cluster-multi-computer.idmap \
@@ -4849,33 +4839,6 @@
mysql-cluster-configuration-core-manprepped.xml: $(mysql_cluster_configuration_core_SOURCES) $(mysql_cluster_configuration_core_IDMAPS)
mysql-cluster-configuration-core-remprepped.xml: $(mysql_cluster_configuration_core_SOURCES) $(mysql_cluster_configuration_core_IDMAPS)
-mysql_cluster_disk_data_INCLUDES = \
- ../common/fixedchars.ent \
- ../common/phrases.ent \
- ../gui-common/gui-common.ent \
- ../mysql-monitor-2.0/version.ent \
- ../mysql-monitor-common/enterprise.ent \
- ../refman-common/urls.ent \
- all-entities.ent \
- versions.ent
-mysql_cluster_disk_data_IMAGES =
-mysql_cluster_disk_data_SOURCES = mysql-cluster-disk-data.xml $(mysql_cluster_disk_data_INCLUDES)
-mysql_cluster_disk_data_IDMAPS = \
- metadata/data-types.idmap \
- metadata/information-schema.idmap \
- metadata/mysql-cluster-configuration-core.idmap \
- metadata/mysql-cluster-disk-data.idmap \
- metadata/mysql-cluster-programs-core.idmap \
- metadata/sql-syntax-data-definition.idmap
-mysql-cluster-disk-data.validpure: $(mysql_cluster_disk_data_SOURCES)
-mysql-cluster-disk-data.titles: $(mysql_cluster_disk_data_SOURCES)
-mysql-cluster-disk-data.useless: $(mysql_cluster_disk_data_SOURCES)
-mysql-cluster-disk-data.valid: $(mysql_cluster_disk_data_SOURCES) $(mysql_cluster_disk_data_IDMAPS)
-mysql-cluster-disk-data.validwarn: $(mysql_cluster_disk_data_SOURCES) $(mysql_cluster_disk_data_IDMAPS)
-mysql-cluster-disk-data-prepped.xml: $(mysql_cluster_disk_data_SOURCES) $(mysql_cluster_disk_data_IDMAPS)
-mysql-cluster-disk-data-manprepped.xml: $(mysql_cluster_disk_data_SOURCES) $(mysql_cluster_disk_data_IDMAPS)
-mysql-cluster-disk-data-remprepped.xml: $(mysql_cluster_disk_data_SOURCES) $(mysql_cluster_disk_data_IDMAPS)
-
mysql_cluster_glossary_INCLUDES = \
../common/fixedchars.ent \
../common/phrases.ent \
@@ -4889,8 +4852,6 @@
mysql_cluster_glossary_SOURCES = mysql-cluster-glossary.xml $(mysql_cluster_glossary_INCLUDES)
mysql_cluster_glossary_IDMAPS = \
metadata/mysql-cluster-configuration-core.idmap \
- metadata/mysql-cluster-disk-data.idmap \
- metadata/mysql-cluster-interconnects.idmap \
metadata/mysql-cluster-management.idmap \
metadata/mysql-cluster-overview.idmap \
metadata/mysql-cluster-programs-core.idmap
@@ -4903,29 +4864,6 @@
mysql-cluster-glossary-manprepped.xml: $(mysql_cluster_glossary_SOURCES) $(mysql_cluster_glossary_IDMAPS)
mysql-cluster-glossary-remprepped.xml: $(mysql_cluster_glossary_SOURCES) $(mysql_cluster_glossary_IDMAPS)
-mysql_cluster_interconnects_INCLUDES = \
- ../common/fixedchars.ent \
- ../common/phrases.ent \
- ../gui-common/gui-common.ent \
- ../mysql-monitor-2.0/version.ent \
- ../mysql-monitor-common/enterprise.ent \
- ../refman-common/urls.ent \
- all-entities.ent \
- versions.ent
-mysql_cluster_interconnects_IMAGES =
-mysql_cluster_interconnects_SOURCES = mysql-cluster-interconnects.xml $(mysql_cluster_interconnects_INCLUDES)
-mysql_cluster_interconnects_IDMAPS = \
- metadata/mysql-cluster-configuration-core.idmap \
- metadata/mysql-cluster-interconnects.idmap
-mysql-cluster-interconnects.validpure: $(mysql_cluster_interconnects_SOURCES)
-mysql-cluster-interconnects.titles: $(mysql_cluster_interconnects_SOURCES)
-mysql-cluster-interconnects.useless: $(mysql_cluster_interconnects_SOURCES)
-mysql-cluster-interconnects.valid: $(mysql_cluster_interconnects_SOURCES) $(mysql_cluster_interconnects_IDMAPS)
-mysql-cluster-interconnects.validwarn: $(mysql_cluster_interconnects_SOURCES) $(mysql_cluster_interconnects_IDMAPS)
-mysql-cluster-interconnects-prepped.xml: $(mysql_cluster_interconnects_SOURCES) $(mysql_cluster_interconnects_IDMAPS)
-mysql-cluster-interconnects-manprepped.xml: $(mysql_cluster_interconnects_SOURCES) $(mysql_cluster_interconnects_IDMAPS)
-mysql-cluster-interconnects-remprepped.xml: $(mysql_cluster_interconnects_SOURCES) $(mysql_cluster_interconnects_IDMAPS)
-
mysql_cluster_limitations_INCLUDES = \
../common/fixedchars.ent \
../common/phrases.ent \
@@ -4941,7 +4879,6 @@
../refman-common/metadata/bug-reports.idmap \
metadata/data-types.idmap \
metadata/mysql-cluster-configuration-core.idmap \
- metadata/mysql-cluster-disk-data.idmap \
metadata/mysql-cluster-limitations.idmap \
metadata/mysql-cluster-management.idmap \
metadata/mysql-cluster-multi-computer.idmap \
@@ -4974,6 +4911,7 @@
../ndbapi/metadata/mgm-api.idmap \
../ndbapi/metadata/ndb-errors.idmap \
../ndbapi/metadata/ndb-internals.idmap \
+ metadata/data-types.idmap \
metadata/dba-mysqld-server-core.idmap \
metadata/information-schema.idmap \
metadata/mysql-cluster-configuration-core.idmap \
@@ -5091,8 +5029,6 @@
../ndbapi/metadata/getting-started.idmap \
../ndbapi/metadata/mgm-api.idmap \
metadata/mysql-cluster-configuration-core.idmap \
- metadata/mysql-cluster-disk-data.idmap \
- metadata/mysql-cluster-interconnects.idmap \
metadata/mysql-cluster-limitations.idmap \
metadata/mysql-cluster-management.idmap \
metadata/mysql-cluster-overview.idmap \
@@ -5185,7 +5121,6 @@
../ndbapi/metadata/ndb-internals.idmap \
metadata/installing-core.idmap \
metadata/mysql-cluster-configuration-core.idmap \
- metadata/mysql-cluster-disk-data.idmap \
metadata/mysql-cluster-limitations.idmap \
metadata/mysql-cluster-management.idmap \
metadata/mysql-cluster-news-core.idmap \
@@ -5284,9 +5219,7 @@
dynxml-local-mysql-cluster-optvar.xml \
dynxml-local-mysql-cluster-programs.xml \
mysql-cluster-configuration-core.xml \
- mysql-cluster-disk-data.xml \
mysql-cluster-glossary.xml \
- mysql-cluster-interconnects.xml \
mysql-cluster-limitations.xml \
mysql-cluster-management.xml \
mysql-cluster-multi-computer.xml \
@@ -5347,9 +5280,7 @@
metadata/installing-core.idmap \
metadata/introduction.idmap \
metadata/mysql-cluster-configuration-core.idmap \
- metadata/mysql-cluster-disk-data.idmap \
metadata/mysql-cluster-glossary.idmap \
- metadata/mysql-cluster-interconnects.idmap \
metadata/mysql-cluster-limitations.idmap \
metadata/mysql-cluster-management.idmap \
metadata/mysql-cluster-multi-computer.idmap \
@@ -6420,7 +6351,6 @@
metadata/internationalization.idmap \
metadata/introduction.idmap \
metadata/language-structure-core.idmap \
- metadata/mysql-cluster-disk-data.idmap \
metadata/mysql-cluster-limitations.idmap \
metadata/mysql-cluster-management.idmap \
metadata/mysql-cluster.idmap \
Modified: trunk/refman-5.1/mysql-cluster-configuration-core.xml
===================================================================
--- trunk/refman-5.1/mysql-cluster-configuration-core.xml 2009-09-25 07:18:52 UTC (rev 16804)
+++ trunk/refman-5.1/mysql-cluster-configuration-core.xml 2009-09-25 09:34:22 UTC (rev 16805)
Changed blocks: 2, Lines Added: 658, Lines Deleted: 341; 44739 bytes
@@ -7847,6 +7847,443 @@
</section>
+ <section id="mysql-cluster-config-lcp-params">
+
+ <title>Configuring MySQL Cluster Parameters for Local Checkpoints</title>
+
+ <indexterm>
+ <primary>DataMemory</primary>
+ </indexterm>
+
+ <indexterm>
+ <primary>IndexMemory</primary>
+ </indexterm>
+
+ <indexterm>
+ <primary>NoOfDiskPagesToDiskAfterRestartACC</primary>
+ <secondary>calculating</secondary>
+ </indexterm>
+
+ <indexterm>
+ <primary>NoOfDiskPagesToDiskAfterRestartTUP</primary>
+ <secondary>calculating</secondary>
+ </indexterm>
+
+ <indexterm>
+ <primary>NoOfFragmentLogFiles</primary>
+ <secondary>calculating</secondary>
+ </indexterm>
+
+ <indexterm>
+ <primary>local checkpoints (MySQL Cluster)</primary>
+ </indexterm>
+
+ <indexterm>
+ <primary>MySQL Cluster</primary>
+ <secondary>configuration</secondary>
+ </indexterm>
+
+ <remark role="note">
+ [js] Adapted from an item in Mikael Ronström's blog,
+ 2006-07-11.
+ </remark>
+
+ <para>
+ The parameters discussed in
+ <link linkend="mysql-cluster-logging-and-checkpointing">Logging
+ and Checkpointing</link> and in
+ <link linkend="mysql-cluster-data-memory-index-memory-string-memory">Data
+ Memory, Index Memory, and String Memory</link> that are used to
+ configure local checkpoints for a MySQL Cluster do not exist in
+ isolation, but rather are very much interdepedent on each other.
+ In this section, we illustrate how these parameters —
+ including <literal>DataMemory</literal>,
+ <literal>IndexMemory</literal>,
+ <literal>NoOfDiskPagesToDiskAfterRestartTUP</literal>,
+ <literal>NoOfDiskPagesToDiskAfterRestartACC</literal>, and
+ <literal>NoOfFragmentLogFiles</literal> — relate to one
+ another in a working Cluster.
+ </para>
+
+ <important>
+ <para>
+ The parameters
+ <literal>NoOfDiskPagesToDiskAfterRestartTUP</literal> and
+ <literal>NoOfDiskPagesToDiskAfterRestartACC</literal> were
+ deprecated in MySQL 5.1.6. From MySQL 5.1.6 through 5.1.11,
+ disk writes during LCPs took place at the maximum speed
+ possible. Beginning with MySQL 5.1.12, the speed and
+ throughput for LCPs are controlled using the parameters
+ <literal>DiskSyncSize</literal>,
+ <literal>DiskCheckpointSpeed</literal>, and
+ <literal>DiskCheckpointSpeedInRestart</literal>. See
+ <xref linkend="mysql-cluster-ndbd-definition"/>.
+ </para>
+ </important>
+
+ <para>
+ In this example, we assume that our application performs the
+ following numbers of types of operations per hour:
+ </para>
+
+ <itemizedlist>
+
+ <listitem>
+ <para>
+ 50000 selects
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ 15000 inserts
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ 15000 updates
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ 15000 deletes
+ </para>
+ </listitem>
+
+ </itemizedlist>
+
+ <para>
+ We also make the following assumptions about the data used in
+ the application:
+ </para>
+
+ <itemizedlist>
+
+ <listitem>
+ <para>
+ We are working with a single table having 40 columns.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ Each column can hold up to 32 bytes of data.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ A typical <literal role="stmt">UPDATE</literal> run by the
+ application affects the values of 5 columns.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ No <literal>NULL</literal> values are inserted by the
+ application.
+ </para>
+ </listitem>
+
+ </itemizedlist>
+
+ <para>
+ A good starting point is to determine the amount of time that
+ should elapse between local checkpoints (LCPs). It is worth
+ noting that, in the event of a system restart, it takes 40-60
+ percent of this interval to execute the REDO log — for
+ example, if the time between LCPs is 5 minutes (300 seconds),
+ then it should take 2 to 3 minutes (120 to 180 seconds) for the
+ REDO log to be read.
+ </para>
+
+ <para>
+ The maximum amount of data per node can be assumed to be the
+ size of the <literal>DataMemory</literal> parameter. In this
+ example, we assume that this is 2 GB. The
+ <literal>NoOfDiskPagesToDiskAfterRestartTUP</literal> parameter
+ represents the amount of data to be checkpointed per unit time
+ — however, this parameter is actually expressed as the
+ number of 8K memory pages to be checkpointed per 100
+ milliseconds. 2 GB per 300 seconds is approximately 6.8 MB per
+ second, or 700 KB per 100 milliseconds, which works out to
+ roughly 85 pages per 100 milliseconds.
+ </para>
+
+ <para>
+ Similarly, we can calculate
+ <literal>NoOfDiskPagesToDiskAfterRestartACC</literal> in terms
+ of the time for local checkpoints and the amount of memory
+ required for indexes — that is, the
+ <literal>IndexMemory</literal>. Assuming that we allow 512 MB
+ for indexes, this works out to approximately 20 8-KB pages per
+ 100 milliseconds for this parameter.
+ </para>
+
+ <para>
+ Next, we need to determine the number of REDO log files required
+ — that is, fragment log files — the corresponding
+ parameter being <literal>NoOfFragmentLogFiles</literal>. We need
+ to make sure that there are sufficient REDO log files for
+ keeping records for at least 3 local checkpoints (in MySQL
+ Cluster NDB 6.3.8 and later, we need only allow for 2 local
+ checkpoints). In a production setting, there are always
+ uncertainties — for instance, we cannot be sure that disks
+ always operate at top speed or with maximum throughput. For this
+ reason, it is best to err on the side of caution, so we double
+ our requirement and calculate a number of fragment log files
+ which should be enough to keep records covering 6 local
+ checkpoints (in MySQL Cluster NDB 6.3.8 and later, a number of
+ fragment log files accommodating 4 local checkpoints should be
+ sufficient).
+ </para>
+
+ <para>
+ It is also important to remember that the disk also handles
+ writes to the REDO log, so if you find that the amount of data
+ being written to disk as determined by the values of
+ <literal>NoOfDiskPagesToDiskAfterRestartACC</literal> and
+ <literal>NoOfDiskPagesToDiskAfterRestartTUP</literal> is
+ approaching the amount of disk bandwidth available, you may wish
+ to increase the time between local checkpoints.
+ </para>
+
+ <para>
+ Given 5 minutes (300 seconds) per local checkpoint, this means
+ that we need to support writing log records at maximum speed for
+ 6 * 300 = 1800 seconds (<emphasis>MySQL Cluster NDB 6.3.8 and
+ later</emphasis>: 4 * 300 = 1200 seconds). The size of a REDO
+ log record is 72 bytes plus 4 bytes per updated column value
+ plus the maximum size of the updated column, and there is one
+ REDO log record for each table record updated in a transaction,
+ on each node where the data reside. Using the numbers of
+ operations set out previously in this section, we derive the
+ following:
+ </para>
+
+ <itemizedlist>
+
+ <listitem>
+ <para>
+ 50000 select operations per hour yields 0 log records (and
+ thus 0 bytes), since <literal role="stmt">SELECT</literal>
+ statements are not recorded in the REDO log.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ 15000 <literal role="stmt">DELETE</literal> statements per
+ hour is approximately 5 delete operations per second. (Since
+ we wish to be conservative in our estimate, we round up here
+ and in the following calculations.) No columns are updated
+ by deletes, so these statements consume only 5 operations *
+ 72 bytes per operation = 360 bytes per second.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ 15000 <literal role="stmt">UPDATE</literal> statements per
+ hour is roughly the same as 5 updates per second. Each
+ update uses 72 bytes, plus 4 bytes per column * 5 columns
+ updated, plus 32 bytes per column * 5 columns — this
+ works out to 72 + 20 + 160 = 252 bytes per operation, and
+ multiplying this by 5 operation per second yields 1260 bytes
+ per second.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ 15000 <literal role="stmt">INSERT</literal> statements per
+ hour is equivalent to 5 insert operations per second. Each
+ insert requires REDO log space of 72 bytes, plus 4 bytes per
+ record * 40 columns, plus 32 bytes per column * 40 columns,
+ which is 72 + 160 + 1280 = 1512 bytes per operation. This
+ times 5 operations per second yields 7560 bytes per second.
+ </para>
+ </listitem>
+
+ </itemizedlist>
+
+ <para>
+ So the total number of REDO log bytes being written per second
+ is approximately 0 + 360 + 1260 + 7560 = 9180 bytes. Multiplied
+ by 1800 seconds, this yields 16524000 bytes required for REDO
+ logging, or approximately 15.75 MB. The unit used for
+ <literal>NoOfFragmentLogFiles</literal> represents a set of 4
+ 16-MB log files — that is, 64 MB. Thus, the minimum value
+ (3) for this parameter is sufficient for the scenario envisioned
+ in this example, since 3 times 64 = 192 MB, or about 12 times
+ what is required; the default value of 8 (or 512 MB) is more
+ than ample in this case.
+ </para>
+
+ </section>
+
+ <section id="mysql-cluster-config-send-buffers">
+
+ <title>Configuring MySQL Cluster Send Buffer Parameters</title>
+
+ <para>
+ Formerly, the NDB kernel used a send buffer whose size was fixed
+ at 2MB for every node in the cluster, which was allocated when
+ the node started. Because the size of this buffer could not be
+ changed after the cluster was started, it was necessary to make
+ it large enough in advance to accomodate the maximum possible
+ load on any transporter socket. However, this was an inefficient
+ use of memory, since much of it often went unused, and could
+ result in large amounts of resources being wasted when scaling
+ up to many API nodes.
+ </para>
+
+ <para>
+ Beginning with MySQL Cluster NDB 7.0, this problem is solved by
+ employing a unified send buffer whose memory is allocated
+ dynamically from a pool shared by all transporters. This means
+ that the size of the send buffer can be adjusted as necessary.
+ Configuration of the unified send buffer can accomplished by
+ setting the following parameters:
+ </para>
+
+ <itemizedlist>
+
+ <listitem>
+ <formalpara>
+
+ <title><literal>TotalSendBufferMemory</literal></title>
+
+ <para>
+ This parameter can be set for all types of MySQL Cluster
+ nodes — that is, it can be set in the
+ <literal>[ndbd]</literal>, <literal>[mgm]</literal>, and
+ <literal>[api]</literal> (or <literal>[mysql]</literal>)
+ sections of the <filename>config.ini</filename> file. It
+ represents the total amount of memory (in bytes) to be
+ allocated by each node for which it is set for use among
+ all configured transporters. If set, its minimum is 256K;
+ the maximum is 4294967039.
+ </para>
+
+ </formalpara>
+
+ <para>
+ In order to be backward-compatible with existing
+ configurations, this parameter takes as its default value
+ the sum of the maximum send buffer sizes of all configured
+ transporters, plus an additional 32KB (one page) per
+ transporter. The maximum depends on the type of transporter,
+ as shown in the following table:
+
+ <informaltable>
+ <tgroup cols="2">
+ <colspec colwidth="30*"/>
+ <colspec colwidth="70*"/>
+ <thead>
+ <row>
+ <entry>Transporter</entry>
+ <entry>Maxmimum Send Buffer Size (bytes)</entry>
+ </row>
+ </thead>
+ <tbody>
+ <row>
+ <entry>TCP</entry>
+ <entry><literal>SendBufferMemory</literal> (default = 2M)</entry>
+ </row>
+ <row>
+ <entry>SCI</entry>
+ <entry><literal>SendLimit</literal> (default = 8K) plus 16K</entry>
+ </row>
+ <row>
+ <entry>SHM</entry>
+ <entry>20K</entry>
+ </row>
+ </tbody>
+ </tgroup>
+ </informaltable>
+
+ This allows existing configurations to function in close to
+ the same way as they did with MySQL Cluster NDB 6.3 and
+ earlier, with the same amount of memory and send buffer
+ space available to each transporter. However, memory that is
+ unused by one transporter is not available to other
+ transporters.
+ </para>
+ </listitem>
+
+ <listitem>
+ <formalpara>
+
+ <title><literal>ReservedSendBufferMemory</literal></title>
+
+ <para>
+ This optional data node parameter, if set, gives an amount
+ of memory (in bytes) that is reserved for connections
+ between data nodes; this memory is not allocated to send
+ buffers used for communications with management servers or
+ API nodes. This provides a way to protect the cluster
+ against misbehaving API nodes that use excess send memory
+ and thus cause failures in communications internally in
+ the NDB kernel. If set, its the minimum permitted value
+ for this parameters is 256K; the maximum is 4294967039.
+ </para>
+
+ </formalpara>
+ </listitem>
+
+ <listitem>
+ <formalpara>
+
+ <title><literal>OverloadLimit</literal></title>
+
+ <para>
+ This parameter is used in the
+ <literal>config.ini</literal> file
+ <literal>[tcp]</literal> section, and denotes the amount
+ of unsent data (in bytes) that must be present in the send
+ buffer before the connection is considered overloaded.
+ When such an overload condition occurs, transactions that
+ affect the overloaded connection fail with NDB API Error
+ 1218 (<errortext>Send Buffers overloaded in NDB
+ kernel</errortext>) until the overload status passes. The
+ default value is 0; there is no defined maximum value for
+ this parameter.
+ </para>
+
+ </formalpara>
+ </listitem>
+
+ <listitem>
+ <formalpara>
+
+ <title><literal>SendBufferMemory</literal></title>
+
+ <para>
+ In MySQL Cluster NDB 6.3 and earlier, this TCP
+ configuration parameter represented the amount of memory
+ allocated at startup for each configured TCP connection.
+ Beginning with MySQL Cluster NDB 7.0, this value denotes a
+ hard limit for how much memory (out of the total available
+ — that is, <literal>TotalSendBufferMemory</literal>)
+ that may be used by a single transporter. However, the sum
+ of <literal>TotalSendBufferMemory</literal> for all
+ configured transporters may be greater than
+ <literal>SendBufferMemory</literal>. This is a way to save
+ memory when many nodes are in use, as long as the maximum
+ amount of memory is never required by all transporters at
+ the same time.
+ </para>
+
+ </formalpara>
+ </listitem>
+
+ </itemizedlist>
+
+ </section>
+
</section>
<section id="mysql-cluster-params-overview">
@@ -8173,434 +8610,314 @@
</section>
- <section id="mysql-cluster-config-lcp-params">
+ <section id="mysql-cluster-interconnects">
- <title>Configuring MySQL Cluster Parameters for Local Checkpoints</title>
+ <title>Using High-Speed Interconnects with MySQL Cluster</title>
<indexterm>
- <primary>DataMemory</primary>
+ <primary>MySQL Cluster</primary>
+ <secondary>network transporters</secondary>
</indexterm>
<indexterm>
- <primary>IndexMemory</primary>
- </indexterm>
-
- <indexterm>
- <primary>NoOfDiskPagesToDiskAfterRestartACC</primary>
- <secondary>calculating</secondary>
- </indexterm>
-
- <indexterm>
- <primary>NoOfDiskPagesToDiskAfterRestartTUP</primary>
- <secondary>calculating</secondary>
- </indexterm>
-
- <indexterm>
- <primary>NoOfFragmentLogFiles</primary>
- <secondary>calculating</secondary>
- </indexterm>
-
- <indexterm>
- <primary>local checkpoints (MySQL Cluster)</primary>
- </indexterm>
-
- <indexterm>
<primary>MySQL Cluster</primary>
- <secondary>configuration</secondary>
+ <secondary>interconnects</secondary>
</indexterm>
- <remark role="note">
- [js] Adapted from an item in Mikael Ronström's blog, 2006-07-11.
- </remark>
-
<para>
- The parameters discussed in
- <link linkend="mysql-cluster-logging-and-checkpointing">Logging
- and Checkpointing</link> and in
- <link linkend="mysql-cluster-data-memory-index-memory-string-memory">Data
- Memory, Index Memory, and String Memory</link> that are used to
- configure local checkpoints for a MySQL Cluster do not exist in
- isolation, but rather are very much interdepedent on each other.
- In this section, we illustrate how these parameters —
- including <literal>DataMemory</literal>,
- <literal>IndexMemory</literal>,
- <literal>NoOfDiskPagesToDiskAfterRestartTUP</literal>,
- <literal>NoOfDiskPagesToDiskAfterRestartACC</literal>, and
- <literal>NoOfFragmentLogFiles</literal> — relate to one
- another in a working Cluster.
+ Even before design of <literal role="se">NDBCLUSTER</literal>
+ began in 1996, it was evident that one of the major problems to be
+ encountered in building parallel databases would be communication
+ between the nodes in the network. For this reason,
+ <literal role="se">NDBCLUSTER</literal> was designed from the very
+ beginning to allow for the use of a number of different data
+ transport mechanisms. In this Manual, we use the term
+ <firstterm>transporter</firstterm> for these.
</para>
- <important>
- <para>
- The parameters
- <literal>NoOfDiskPagesToDiskAfterRestartTUP</literal> and
- <literal>NoOfDiskPagesToDiskAfterRestartACC</literal> were
- deprecated in MySQL 5.1.6. From MySQL 5.1.6 through 5.1.11, disk
- writes during LCPs took place at the maximum speed possible.
- Beginning with MySQL 5.1.12, the speed and throughput for LCPs
- are controlled using the parameters
- <literal>DiskSyncSize</literal>,
- <literal>DiskCheckpointSpeed</literal>, and
- <literal>DiskCheckpointSpeedInRestart</literal>. See
- <xref linkend="mysql-cluster-ndbd-definition"/>.
- </para>
- </important>
-
<para>
- In this example, we assume that our application performs the
- following numbers of types of operations per hour:
+ The MySQL Cluster codebase includes support for four different
+ transporters:
</para>
<itemizedlist>
<listitem>
<para>
- 50000 selects
+ <emphasis>TCP/IP using 100 Mbps or gigabit
+ Ethernet</emphasis>, as discussed in
+ <xref linkend="mysql-cluster-tcp-definition"/>.
</para>
</listitem>
<listitem>
<para>
- 15000 inserts
+ <emphasis>Direct (machine-to-machine) TCP/IP</emphasis>;
+ although this transporter uses the same TCP/IP protocol as
+ mentioned in the previous item, it requires setting up the
+ hardware differently and is configured differently as well.
+ For this reason, it is considered a separate transport
+ mechanism for MySQL Cluster. See
+ <xref linkend="mysql-cluster-tcp-definition-direct"/>, for
+ details.
</para>
</listitem>
<listitem>
<para>
- 15000 updates
+ <emphasis>Shared memory (SHM)</emphasis>. For more information
+ about SHM, see <xref linkend="mysql-cluster-shm-definition"/>.
</para>
</listitem>
<listitem>
<para>
- 15000 deletes
+ <emphasis>Scalable Coherent Interface (SCI)</emphasis>, as
+ described in the next section of this chapter,
+ <xref linkend="mysql-cluster-sci-definition"/>.
</para>
</listitem>
</itemizedlist>
<para>
- We also make the following assumptions about the data used in the
- application:
+ Most users today employ TCP/IP over Ethernet because it is
+ ubiquitous. TCP/IP is also by far the best-tested transporter for
+ use with MySQL Cluster.
</para>
- <itemizedlist>
-
- <listitem>
- <para>
- We are working with a single table having 40 columns.
- </para>
- </listitem>
-
- <listitem>
- <para>
- Each column can hold up to 32 bytes of data.
- </para>
- </listitem>
-
- <listitem>
- <para>
- A typical <literal role="stmt">UPDATE</literal> run by the
- application affects the values of 5 columns.
- </para>
- </listitem>
-
- <listitem>
- <para>
- No <literal>NULL</literal> values are inserted by the
- application.
- </para>
- </listitem>
-
- </itemizedlist>
-
<para>
- A good starting point is to determine the amount of time that
- should elapse between local checkpoints (LCPs). It is worth noting
- that, in the event of a system restart, it takes 40-60 percent of
- this interval to execute the REDO log — for example, if the
- time between LCPs is 5 minutes (300 seconds), then it should take
- 2 to 3 minutes (120 to 180 seconds) for the REDO log to be read.
+ We are working to make sure that communication with the
+ <command>ndbd</command> process is made in <quote>chunks</quote>
+ that are as large as possible because this benefits all types of
+ data transmission.
</para>
<para>
- The maximum amount of data per node can be assumed to be the size
- of the <literal>DataMemory</literal> parameter. In this example,
- we assume that this is 2 GB. The
- <literal>NoOfDiskPagesToDiskAfterRestartTUP</literal> parameter
- represents the amount of data to be checkpointed per unit time
- — however, this parameter is actually expressed as the
- number of 8K memory pages to be checkpointed per 100 milliseconds.
- 2 GB per 300 seconds is approximately 6.8 MB per second, or 700 KB
- per 100 milliseconds, which works out to roughly 85 pages per 100
- milliseconds.
+ For users who desire it, it is also possible to use cluster
+ interconnects to enhance performance even further. There are two
+ ways to achieve this: Either a custom transporter can be designed
+ to handle this case, or you can use socket implementations that
+ bypass the TCP/IP stack to one extent or another. We have
+ experimented with both of these techniques using the SCI (Scalable
+ Coherent Interface) technology developed by
+ <ulink url="http://www.dolphinics.com/">Dolphin Interconnect
+ Solutions</ulink>.
</para>
- <para>
- Similarly, we can calculate
- <literal>NoOfDiskPagesToDiskAfterRestartACC</literal> in terms of
- the time for local checkpoints and the amount of memory required
- for indexes — that is, the <literal>IndexMemory</literal>.
- Assuming that we allow 512 MB for indexes, this works out to
- approximately 20 8-KB pages per 100 milliseconds for this
- parameter.
- </para>
+ <section id="mysql-cluster-sci-sockets">
- <para>
- Next, we need to determine the number of REDO log files required
- — that is, fragment log files — the corresponding
- parameter being <literal>NoOfFragmentLogFiles</literal>. We need
- to make sure that there are sufficient REDO log files for keeping
- records for at least 3 local checkpoints (in MySQL Cluster NDB
- 6.3.8 and later, we need only allow for 2 local checkpoints). In a
- production setting, there are always uncertainties — for
- instance, we cannot be sure that disks always operate at top speed
- or with maximum throughput. For this reason, it is best to err on
- the side of caution, so we double our requirement and calculate a
- number of fragment log files which should be enough to keep
- records covering 6 local checkpoints (in MySQL Cluster NDB 6.3.8
- and later, a number of fragment log files accommodating 4 local
- checkpoints should be sufficient).
- </para>
+ <title>Configuring MySQL Cluster to use SCI Sockets</title>
- <para>
- It is also important to remember that the disk also handles writes
- to the REDO log, so if you find that the amount of data being
- written to disk as determined by the values of
- <literal>NoOfDiskPagesToDiskAfterRestartACC</literal> and
- <literal>NoOfDiskPagesToDiskAfterRestartTUP</literal> is
- approaching the amount of disk bandwidth available, you may wish
- to increase the time between local checkpoints.
- </para>
+ <indexterm>
+ <primary>MySQL Cluster</primary>
+ <secondary>network transporters</secondary>
+ </indexterm>
- <para>
- Given 5 minutes (300 seconds) per local checkpoint, this means
- that we need to support writing log records at maximum speed for 6
- * 300 = 1800 seconds (<emphasis>MySQL Cluster NDB 6.3.8 and
- later</emphasis>: 4 * 300 = 1200 seconds). The size of a REDO log
- record is 72 bytes plus 4 bytes per updated column value plus the
- maximum size of the updated column, and there is one REDO log
- record for each table record updated in a transaction, on each
- node where the data reside. Using the numbers of operations set
- out previously in this section, we derive the following:
- </para>
+ <indexterm>
+ <primary>MySQL Cluster</primary>
+ <secondary>SCI (Scalable Coherent Interface)</secondary>
+ </indexterm>
- <itemizedlist>
+ <indexterm>
+ <primary>SCI (Scalable Coherent Interface)</primary>
+ <see>MySQL Cluster</see>
+ </indexterm>
- <listitem>
- <para>
- 50000 select operations per hour yields 0 log records (and
- thus 0 bytes), since <literal role="stmt">SELECT</literal>
- statements are not recorded in the REDO log.
- </para>
- </listitem>
+ <para>
+ It is possible employing Scalable Coherent Interface (SCI)
+ technology to achieve a significant increase in connection
+ speeds and throughput between MySQL Cluster data and SQL nodes.
+ To use SCI, it is necessary to obtain and install Dolphin SCI
+ network cards and to use the drivers and other software supplied
+ by Dolphin. You can get information on obtaining these, from
+ <ulink url="http://www.dolphinics.com/">Dolphin Interconnect
+ Solutions</ulink>. SCI SuperSocket or SCI Transporter support is
+ available for 32-bit and 64-bit Linux, Solaris, Windows, and
+ other platforms. See the Dolphin documentation referenced later
+ in this section for more detailed information regarding
+ platforms supported for SCI.
+ </para>
- <listitem>
+ <note>
<para>
- 15000 <literal role="stmt">DELETE</literal> statements per
- hour is approximately 5 delete operations per second. (Since
- we wish to be conservative in our estimate, we round up here
- and in the following calculations.) No columns are updated by
- deletes, so these statements consume only 5 operations * 72
- bytes per operation = 360 bytes per second.
+ Prior to MySQL 5.1.20, there were issues with building MySQL
+ Cluster with SCI support (see Bug#25470), but these have been
+ resolved due to work contributed by Dolphin. SCI Sockets are
+ now correctly supported for MySQL Cluster hosts running recent
+ versions of Linux using the <literal>-max</literal> builds,
+ and versions of MySQL Cluster with SCI Transporter support can
+ be built using either of
+ <command>compile-amd64-max-sci</command> or
+ <command>compile-pentium64-max-sci</command>. Both of these
+ build scripts can be found in the <filename>BUILD</filename>
+ directory of the MySQL Cluster source trees; it should not be
+ difficult to adapt them for other platforms. Generally, all
+ that is necessary is to compile MySQL Cluster with SCI
+ Transporter support is to configure the MySQL Cluster build
+ using <option>--with-ndb-sci=/opt/DIS</option>.
</para>
- </listitem>
+ </note>
- <listitem>
- <para>
- 15000 <literal role="stmt">UPDATE</literal> statements per
- hour is roughly the same as 5 updates per second. Each update
- uses 72 bytes, plus 4 bytes per column * 5 columns updated,
- plus 32 bytes per column * 5 columns — this works out to
- 72 + 20 + 160 = 252 bytes per operation, and multiplying this
- by 5 operation per second yields 1260 bytes per second.
- </para>
- </listitem>
+ <para>
+ Once you have acquired the required Dolphin hardware and
+ software, you can obtain detailed information on how to adapt a
+ MySQL Cluster configured for normal TCP/IP communication to use
+ SCI from the <citetitle>Dolphin Express for MySQL Installation
+ and Reference Guide</citetitle>, available for download at
+ <ulink url="http://docsrva.mysql.com/public/DIS_install_guide_book.pdf"/>
+ (PDF file, 94 pages, 753 KB). This document provides
+ instructions for installing the SCI hardware and software, as
+ well as information concerning network topology and
+ configuration.
+ </para>
- <listitem>
- <para>
- 15000 <literal role="stmt">INSERT</literal> statements per
- hour is equivalent to 5 insert operations per second. Each
- insert requires REDO log space of 72 bytes, plus 4 bytes per
- record * 40 columns, plus 32 bytes per column * 40 columns,
- which is 72 + 160 + 1280 = 1512 bytes per operation. This
- times 5 operations per second yields 7560 bytes per second.
- </para>
- </listitem>
+ </section>
- </itemizedlist>
+ <section id="mysql-cluster-interconnects-performance">
- <para>
- So the total number of REDO log bytes being written per second is
- approximately 0 + 360 + 1260 + 7560 = 9180 bytes. Multiplied by
- 1800 seconds, this yields 16524000 bytes required for REDO
- logging, or approximately 15.75 MB. The unit used for
- <literal>NoOfFragmentLogFiles</literal> represents a set of 4
- 16-MB log files — that is, 64 MB. Thus, the minimum value
- (3) for this parameter is sufficient for the scenario envisioned
- in this example, since 3 times 64 = 192 MB, or about 12 times what
- is required; the default value of 8 (or 512 MB) is more than ample
- in this case.
- </para>
+ <title>MySQL Cluster Interconnects and Performance</title>
- </section>
+ <indexterm>
+ <primary>MySQL Cluster</primary>
+ <secondary>performance</secondary>
+ </indexterm>
- <section id="mysql-cluster-config-send-buffers">
+ <indexterm>
+ <primary>MySQL Cluster</primary>
+ <secondary>benchmarks</secondary>
+ </indexterm>
- <title>Configuring MySQL Cluster Send Buffer Parameters</title>
+ <para>
+ The <command>ndbd</command> process has a number of simple
+ constructs which are used to access the data in a MySQL Cluster.
+ We have created a very simple benchmark to check the performance
+ of each of these and the effects which various interconnects
+ have on their performance.
+ </para>
- <para>
- Formerly, the NDB kernel used a send buffer whose size was fixed
- at 2MB for every node in the cluster, which was allocated when the
- node started. Because the size of this buffer could not be changed
- after the cluster was started, it was necessary to make it large
- enough in advance to accomodate the maximum possible load on any
- transporter socket. However, this was an inefficient use of
- memory, since much of it often went unused, and could result in
- large amounts of resources being wasted when scaling up to many
- API nodes.
- </para>
+ <para>
+ There are four access methods:
+ </para>
- <para>
- Beginning with MySQL Cluster NDB 7.0, this problem is solved by
- employing a unified send buffer whose memory is allocated
- dynamically from a pool shared by all transporters. This means
- that the size of the send buffer can be adjusted as necessary.
- Configuration of the unified send buffer can accomplished by
- setting the following parameters:
- </para>
+ <itemizedlist>
- <itemizedlist>
+ <listitem>
+ <formalpara>
- <listitem>
- <formalpara>
+ <title>Primary key access</title>
- <title><literal>TotalSendBufferMemory</literal></title>
+ <para>
+ This is access of a record through its primary key. In the
+ simplest case, only one record is accessed at a time,
+ which means that the full cost of setting up a number of
+ TCP/IP messages and a number of costs for context
+ switching are borne by this single request. In the case
+ where multiple primary key accesses are sent in one batch,
+ those accesses share the cost of setting up the necessary
+ TCP/IP messages and context switches. If the TCP/IP
+ messages are for different destinations, additional TCP/IP
+ messages need to be set up.
+ </para>
- <para>
- This parameter can be set for all types of MySQL Cluster
- nodes — that is, it can be set in the
- <literal>[ndbd]</literal>, <literal>[mgm]</literal>, and
- <literal>[api]</literal> (or <literal>[mysql]</literal>)
- sections of the <filename>config.ini</filename> file. It
- represents the total amount of memory (in bytes) to be
- allocated by each node for which it is set for use among all
- configured transporters. If set, its minimum is 256K; the
- maximum is 4294967039.
- </para>
+ </formalpara>
+ </listitem>
- </formalpara>
+ <listitem>
+ <formalpara>
- <para>
- In order to be backward-compatible with existing
- configurations, this parameter takes as its default value the
- sum of the maximum send buffer sizes of all configured
- transporters, plus an additional 32KB (one page) per
- transporter. The maximum depends on the type of transporter,
- as shown in the following table:
+ <title>Unique key access</title>
- <informaltable>
- <tgroup cols="2">
- <colspec colwidth="30*"/>
- <colspec colwidth="70*"/>
- <thead>
- <row>
- <entry>Transporter</entry>
- <entry>Maxmimum Send Buffer Size (bytes)</entry>
- </row>
- </thead>
- <tbody>
- <row>
- <entry>TCP</entry>
- <entry><literal>SendBufferMemory</literal> (default = 2M)</entry>
- </row>
- <row>
- <entry>SCI</entry>
- <entry><literal>SendLimit</literal> (default = 8K) plus 16K</entry>
- </row>
- <row>
- <entry>SHM</entry>
- <entry>20K</entry>
- </row>
- </tbody>
- </tgroup>
- </informaltable>
+ <para>
+ Unique key accesses are similar to primary key accesses,
+ except that a unique key access is executed as a read on
+ an index table followed by a primary key access on the
+ table. However, only one request is sent from the MySQL
+ Server, and the read of the index table is handled by
+ <command>ndbd</command>. Such requests also benefit from
+ batching.
+ </para>
- This allows existing configurations to function in close to
- the same way as they did with MySQL Cluster NDB 6.3 and
- earlier, with the same amount of memory and send buffer space
- available to each transporter. However, memory that is unused
- by one transporter is not available to other transporters.
- </para>
- </listitem>
+ </formalpara>
+ </listitem>
- <listitem>
- <formalpara>
+ <listitem>
+ <formalpara>
- <title><literal>ReservedSendBufferMemory</literal></title>
+ <title>Full table scan</title>
+ <para>
+ When no indexes exist for a lookup on a table, a full
+ table scan is performed. This is sent as a single request
+ to the <command>ndbd</command> process, which then divides
+ the table scan into a set of parallel scans on all cluster
+ <command>ndbd</command> processes. In future versions of
+ MySQL Cluster, an SQL node will be able to filter some of
+ these scans.
+ </para>
+
+ </formalpara>
+ </listitem>
+
+ <listitem>
<para>
- This optional data node parameter, if set, gives an amount
- of memory (in bytes) that is reserved for connections
- between data nodes; this memory is not allocated to send
- buffers used for communications with management servers or
- API nodes. This provides a way to protect the cluster
- against misbehaving API nodes that use excess send memory
- and thus cause failures in communications internally in the
- NDB kernel. If set, its the minimum permitted value for this
- parameters is 256K; the maximum is 4294967039.
+ <emphasis role="bold">Range scan using ordered
+ index</emphasis>
</para>
- </formalpara>
- </listitem>
-
- <listitem>
- <formalpara>
-
- <title><literal>OverloadLimit</literal></title>
-
<para>
- This parameter is used in the <literal>config.ini</literal>
- file <literal>[tcp]</literal> section, and denotes the
- amount of unsent data (in bytes) that must be present in the
- send buffer before the connection is considered overloaded.
- When such an overload condition occurs, transactions that
- affect the overloaded connection fail with NDB API Error
- 1218 (<errortext>Send Buffers overloaded in NDB
- kernel</errortext>) until the overload status passes. The
- default value is 0; there is no defined maximum value for
- this parameter.
+ When an ordered index is used, it performs a scan in the
+ same manner as the full table scan, except that it scans
+ only those records which are in the range used by the query
+ transmitted by the MySQL server (SQL node). All partitions
+ are scanned in parallel when all bound index attributes
+ include all attributes in the partitioning key.
</para>
+ </listitem>
- </formalpara>
- </listitem>
+ </itemizedlist>
- <listitem>
- <formalpara>
+ <para>
+ With benchmarks developed internally by MySQL for testing simple
+ and batched primary and unique key accesses, we have found that
+ using SCI sockets improves performance by approximately 100%
+ over TCP/IP, except in rare instances when communication
+ performance is not an issue. This can occur when scan filters
+ make up most of processing time or when very large batches of
+ primary key accesses are achieved. In that case, the CPU
+ processing in the <command>ndbd</command> processes becomes a
+ fairly large part of the overhead.
+ </para>
- <title><literal>SendBufferMemory</literal></title>
+ <para>
+ Using the SCI transporter instead of SCI Sockets is only of
+ interest in communicating between <command>ndbd</command>
+ processes. Using the SCI transporter is also only of interest if
+ a CPU can be dedicated to the <command>ndbd</command> process
+ because the SCI transporter ensures that this process will never
+ go to sleep. It is also important to ensure that the
+ <command>ndbd</command> process priority is set in such a way
+ that the process does not lose priority due to running for an
+ extended period of time, as can be done by locking processes to
+ CPUs in Linux 2.6. If such a configuration is possible, the
+ <command>ndbd</command> process will benefit by 10−70% as
+ compared with using SCI sockets. (The larger figures will be
+ seen when performing updates and probably on parallel scan
+ operations as well.)
+ </para>
- <para>
- In MySQL Cluster NDB 6.3 and earlier, this TCP configuration
- parameter represented the amount of memory allocated at
- startup for each configured TCP connection. Beginning with
- MySQL Cluster NDB 7.0, this value denotes a hard limit for
- how much memory (out of the total available — that is,
- <literal>TotalSendBufferMemory</literal>) that may be used
- by a single transporter. However, the sum of
- <literal>TotalSendBufferMemory</literal> for all configured
- transporters may be greater than
- <literal>SendBufferMemory</literal>. This is a way to save
- memory when many nodes are in use, as long as the maximum
- amount of memory is never required by all transporters at
- the same time.
- </para>
+ <para>
+ There are several other optimized socket implementations for
+ computer clusters, including Myrinet, Gigabit Ethernet,
+ Infiniband and the VIA interface. However, we have tested MySQL
+ Cluster so far only with SCI sockets. See
+ <xref linkend="mysql-cluster-sci-sockets"/>, for information on
+ how to set up SCI sockets using ordinary TCP/IP for MySQL
+ Cluster.
+ </para>
- </formalpara>
- </listitem>
+ </section>
- </itemizedlist>
-
</section>
</section>
Modified: trunk/refman-5.1/mysql-cluster-management.xml
===================================================================
--- trunk/refman-5.1/mysql-cluster-management.xml 2009-09-25 07:18:52 UTC (rev 16804)
+++ trunk/refman-5.1/mysql-cluster-management.xml 2009-09-25 09:34:22 UTC (rev 16805)
Changed blocks: 1, Lines Added: 884, Lines Deleted: 0; 32759 bytes
@@ -8197,6 +8197,890 @@
</section>
+ <section id="mysql-cluster-disk-data">
+
+ <title>MySQL Cluster Disk Data Tables</title>
+
+ <indexterm>
+ <primary>MySQL Cluster Disk Data</primary>
+ </indexterm>
+
+ <indexterm>
+ <primary>MySQL Cluster</primary>
+ <secondary>Disk Data tables</secondary>
+ <see>MySQL Cluster Disk Data</see>
+ </indexterm>
+
+ <indexterm>
+ <primary>Disk Data tables (MySQL Cluster)</primary>
+ <see>MySQL Cluster Disk Data</see>
+ </indexterm>
+
+ <remark role="note">
+ Author: Jon Stephens, based on information supplied by Mikael
+ Ronström and Stewart Smith. Additional assistance provided by Jeb
+ Miller.
+ </remark>
+
+ <para>
+ Beginning with MySQL 5.1.6, it is possible to store the nonindexed
+ columns of <literal role="se">NDB</literal> tables on disk, rather
+ than in RAM as with previous versions of MySQL Cluster.
+ </para>
+
+ <para>
+ As part of implementing MySQL Cluster Disk Data work, a number of
+ improvements were made in MySQL Cluster for the efficient handling
+ of very large amounts (terabytes) of data during node recovery and
+ restart. These include a <quote>no-steal</quote> algorithm for
+ synchronising a starting node with very large data sets. For more
+ information, see the paper
+ <citetitle><ulink url="http://www.vldb2005.org/program/paper/wed/p1108-ronstrom.pdf">Recovery
+ Principles of MySQL Cluster 5.1</ulink></citetitle>, by MySQL
+ Cluster developers Mikael Ronström and Jonas Oreland.
+ </para>
+
+ <para>
+ MySQL Cluster Disk Data performance can be influenced by a number
+ of configuration parameters. For information about these
+ parameters and their effects, see
+ <citetitle><link linkend="mysql-cluster-ndbd-definition-disk-data-parameters">MySQL
+ Cluster Disk Data configuration parameters</link></citetitle> and
+ <citetitle><link linkend="mysql-cluster-ndbd-definition-gcp-stop-errors">MySQL
+ Cluster Disk Data storage and <errortext>GCP Stop</errortext>
+ errors</link></citetitle>
+ </para>
+
+ <para>
+ The performance of a MySQL Cluster that uses Disk Data storage can
+ also be greatly improved by separating data node file systems from
+ undo log files and tablespace data files, which can be done using
+ symbolic links. For more information, see
+ <xref linkend="mysql-cluster-disk-data-symlinks"/>.
+ </para>
+
+ <section id="mysql-cluster-disk-data-objects">
+
+ <title>MySQL Cluster Disk Data Objects</title>
+
+ <para>
+ MySQL Cluster Disk Data storage is implemented using a number of
+ <firstterm>Disk Data objects</firstterm>. These include the
+ following:
+ </para>
+
+ <itemizedlist>
+
+ <listitem>
+ <para>
+ <firstterm>Tablespaces</firstterm> act as containers for
+ other Disk Data objects.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ <firstterm>Undo log files</firstterm> undo information
+ required for rolling back transactions.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ One or more undo log files are assigned to a <firstterm>log
+ file group</firstterm>, which is then assigned to a
+ tablespace.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ <firstterm>Data files</firstterm> store Disk Data table
+ data. A data file is assigned directly to a tablespace.
+ </para>
+ </listitem>
+
+ </itemizedlist>
+
+ <para>
+ Undo log files and data files are actual files in the filesystem
+ of each data node; by default they are placed in
+ <literal>ndb_<replaceable>node_id</replaceable>_fs</literal> in
+ the <replaceable>DataDir</replaceable> specified in the MySQL
+ Cluster <filename>config.ini</filename> file, and where
+ <replaceable>node_id</replaceable> is the data node's node
+ ID. It is possible to place these elsewhere by specifying either
+ an absolute or relative path as part of the filename when
+ creating the undo log or data file. Statements that create these
+ files are shown later in this section.
+ </para>
+
+ <para>
+ MySQL Cluster tablespaces and log file groups are not
+ implemented as files.
+ </para>
+
+ <important>
+ <para>
+ Although not all Disk Data objects are implemented as files,
+ they all share the same namespace. This means that
+ <emphasis>each Disk Data object</emphasis> must be uniquely
+ named (and not merely each Disk Data object of a given type).
+ For example, you cannot have a tablespace and a log file group
+ both named <literal>dd1</literal>.
+ </para>
+ </important>
+
+ <para>
+ Assuming that you have already set up a MySQL Cluster with all
+ nodes (including management and SQL nodes) running MySQL 5.1.6
+ or newer, the basic steps for creating a Cluster table on disk
+ are as follows:
+ </para>
+
+ <indexterm>
+ <primary>MySQL Cluster Disk Data</primary>
+ <secondary>creating tables</secondary>
+ </indexterm>
+
+ <orderedlist>
+
+ <listitem>
+ <para>
+ Create a log file group, and assign one or more undo log
+ files to it (an undo log file is also sometimes referred to
+ as an <firstterm>undofile</firstterm>).
+ </para>
+
+ <note>
+ <para>
+ In MySQL 5.1 and later, undo log files are necessary only
+ for Disk Data tables. They are no longer used for
+ <literal role="se">NDBCLUSTER</literal> tables that are
+ stored only in memory.
+ </para>
+ </note>
+ </listitem>
+
+ <listitem>
+ <para>
+ Create a tablespace; assign the log file group, as well as
+ one or more data files, to the tablespace.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ Create a Disk Data table that uses this tablespace for data
+ storage.
+ </para>
+ </listitem>
+
+ </orderedlist>
+
+ <para>
+ Each of these tasks can be accomplished using SQL statements in
+ the <command>mysql</command> client or other MySQL client
+ application, as shown in the example that follows.
+ </para>
+
+ <orderedlist>
+
+ <listitem>
+ <indexterm>
+ <primary>MySQL Cluster Disk Data</primary>
+ <secondary>creating log file groups</secondary>
+ </indexterm>
+
+ <para>
+ We create a log file group named <literal>lg_1</literal>
+ using <literal>CREATE LOGFILE GROUP</literal>. This log file
+ group is to be made up of two undo log files, which we name
+ <filename>undo_1.log</filename> and
+ <filename>undo_2.log</filename>, whose initial sizes are 16
+ MB and 12 MB, respectively. (The default initial size for an
+ undo log file is 128 MB.) Optionally, you can also specify a
+ size for the log file group's undo buffer, or allow it to
+ assume the default value of 8 MB. In this example, we set
+ the UNDO buffer's size at 2 MB. A log file group must be
+ created with an undo log file; so we add
+ <filename>undo_1.log</filename> to <literal>lg_1</literal>
+ in this <literal>CREATE LOGFILE GROUP</literal> statement:
+ </para>
+
+<programlisting>
+CREATE LOGFILE GROUP lg_1
+ ADD UNDOFILE 'undo_1.log'
+ INITIAL_SIZE 16M
+ UNDO_BUFFER_SIZE 2M
+ ENGINE NDBCLUSTER;
+</programlisting>
+
+ <para>
+ To add <filename>undo_2.log</filename> to the log file
+ group, use the following <literal>ALTER LOGFILE
+ GROUP</literal> statement:
+ </para>
+
+<programlisting>
+ALTER LOGFILE GROUP lg_1
+ ADD UNDOFILE 'undo_2.log'
+ INITIAL_SIZE 12M
+ ENGINE NDBCLUSTER;
+</programlisting>
+
+ <para>
+ Some items of note:
+ </para>
+
+ <itemizedlist>
+
+ <listitem>
+ <para>
+ The <filename>.log</filename> file extension used here
+ is not required. We use it merely to make the log files
+ easily recognisable.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ Every <literal>CREATE LOGFILE GROUP</literal> and
+ <literal>ALTER LOGFILE GROUP</literal> statement must
+ include an <literal>ENGINE</literal> clause. In MySQL
+ 5.1 (including MySQL Cluster NDB 6.X and 7.X through
+ 7.1), the permitted values for this clause are
+ <literal role="se">NDBCLUSTER</literal> and
+ <literal role="se">NDB</literal>.
+ </para>
+
+ <important>
+ <para>
+ In MySQL 5.1.8 and later, there can exist only one log
+ file group in the same MySQL Cluster at any given
+ time.
+ </para>
+ </important>
+ </listitem>
+
+ <listitem>
+ <para>
+ When you add an undo log file to a log file group using
+ <literal>ADD UNDOFILE
+ '<replaceable>filename</replaceable>'</literal>, a file
+ with the name <replaceable>filename</replaceable> is
+ created in the
+ <literal>ndb_<replaceable>node_id</replaceable>_fs</literal>
+ directory within the <literal>DataDir</literal> of each
+ data node in the cluster, where
+ <replaceable>node_id</replaceable> is the node ID of the
+ data node. Each undo log file is of the size specified
+ in the SQL statement. For example, if a MySQL Cluster
+ has 4 data nodes, then the <literal>ALTER LOGFILE
+ GROUP</literal> statement just shown creates 4 undo log
+ files, 1 each on in the data directory of each of the 4
+ data nodes; each of these files is named
+ <filename>undo_2.log</filename> and each file is 12 MB
+ in size.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ <literal>UNDO_BUFFER_SIZE</literal> is limited by the
+ amount of system memory available.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ For more information about the <literal>CREATE LOGFILE
+ GROUP</literal> statement, see
+ <xref linkend="create-logfile-group"/>. For more
+ information about <literal>ALTER LOGFILE
+ GROUP</literal>, see
+ <xref linkend="alter-logfile-group"/>.
+ </para>
+ </listitem>
+
+ </itemizedlist>
+ </listitem>
+
+ <listitem>
+ <indexterm>
+ <primary>MySQL Cluster Disk Data</primary>
+ <secondary>creating tablespaces</secondary>
+ </indexterm>
+
+ <para>
+ Now we can create a tablespace, which contains files to be
+ used by MySQL Cluster Disk Data tables for storing their
+ data. A tablespace is also associated with a particular log
+ file group. When creating a new tablespace, you must specify
+ the log file group which it is to use for undo logging; you
+ must also specify a data file. You can add more data files
+ to the tablespace after the tablespace is created; it is
+ also possible to drop data files from a tablespace (an
+ example of dropping data files is provided later in this
+ section).
+ </para>
+
+ <para>
+ Assume that we wish to create a tablespace named
+ <literal>ts_1</literal> which uses <literal>lg_1</literal>
+ as its log file group. This tablespace is to contain two
+ data files named <filename>data_1.dat</filename> and
+ <filename>data_2.dat</filename>, whose initial sizes are 32
+ MB and 48 MB, respectively. (The default value for
+ <literal>INITIAL_SIZE</literal> is 128 MB.) We can do this
+ using two SQL statements, as shown here:
+ </para>
+
+<programlisting>
+CREATE TABLESPACE ts_1
+ ADD DATAFILE 'data_1.dat'
+ USE LOGFILE GROUP lg_1
+ INITIAL_SIZE 32M
+ ENGINE NDBCLUSTER;
+
+ALTER TABLESPACE ts_1
+ ADD DATAFILE 'data_2.dat'
+ INITIAL_SIZE 48M
+ ENGINE NDBCLUSTER;
+</programlisting>
+
+ <para>
+ The <literal>CREATE TABLESPACE</literal> statement creates a
+ tablespace <literal>ts_1</literal> with the data file
+ <filename>data_1.dat</filename>, and associates
+ <literal>ts_1</literal> with log file group
+ <literal>lg_1</literal>. The <literal>ALTER
+ TABLESPACE</literal> adds the second data file
+ (<filename>data_2.dat</filename>).
+ </para>
+
+ <para>
+ Some items of note:
+ </para>
+
+ <itemizedlist>
+
+ <listitem>
+ <para>
+ As is the case with the <filename>.log</filename> file
+ extension used in this example for undo log files, there
+ is no special significance for the
+ <filename>.dat</filename> file extension; it is used
+ merely for easy recognition of data files.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ When you add a data file to a tablespace using
+ <literal>ADD DATAFILE
+ '<replaceable>filename</replaceable>'</literal>, a file
+ with the name <replaceable>filename</replaceable> is
+ created in the
+ <literal>ndb_<replaceable>node_id</replaceable>_fs</literal>
+ directory within the <literal>DataDir</literal> of each
+ data node in the cluster, where
+ <replaceable>node_id</replaceable> is the node ID of the
+ data node. Each undo log file is of the size specified
+ in the SQL statement. For example, if a MySQL Cluster
+ has 4 data nodes, then the <literal>ALTER
+ TABLESPACE</literal> statement just shown creates 4 undo
+ log files, 1 each on in the data directory of each of
+ the 4 data nodes; each of these files is named
+ <filename>data_2.dat</filename> and each file is 48 MB
+ in size.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ All <literal>CREATE TABLESPACE</literal> and
+ <literal>ALTER TABLESPACE</literal> statements must
+ contain an <literal>ENGINE</literal> clause; only tables
+ using the same storage engine as the tablespace can be
+ created in the tablespace. In MySQL 5.1 (including MySQL
+ Cluster NDB 6.X and 7.X through 7.1), the only permitted
+ values for this clause are
+ <literal role="se">NDBCLUSTER</literal> and
+ <literal role="se">NDB</literal>.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ For more information about the <literal>CREATE
+ TABLESPACE</literal> and <literal>ALTER
+ TABLESPACE</literal> statements, see
+ <xref linkend="create-tablespace"/>, and
+ <xref linkend="alter-tablespace"/>.
+ </para>
+ </listitem>
+
+ </itemizedlist>
+ </listitem>
+
+ <listitem>
+ <indexterm>
+ <primary>MySQL Cluster Disk Data</primary>
+ <secondary>creating tables</secondary>
+ </indexterm>
+
+ <para>
+ Now it is possible to create a table whose nonindexed
+ columns are stored on disk in the tablespace
+ <literal>ts_1</literal>:
+ </para>
+
+<programlisting>
+CREATE TABLE dt_1 (
+ member_id INT UNSIGNED NOT NULL AUTO_INCREMENT PRIMARY KEY,
+ last_name VARCHAR(50) NOT NULL,
+ first_name VARCHAR(50) NOT NULL,
+ dob DATE NOT NULL,
+ joined DATE NOT NULL,
+ INDEX(last_name, first_name)
+ )
+ TABLESPACE ts_1 STORAGE DISK
+ ENGINE NDBCLUSTER;
+</programlisting>
+
+ <para>
+ The <literal>TABLESPACE ... STORAGE DISK</literal> option
+ tells the <literal role="se">NDBCLUSTER</literal> storage
+ engine to use tablespace <literal>ts_1</literal> for disk
+ data storage.
+ </para>
+
+ <note>
+ <para>
+ Beginning with MySQL Cluster NDB 6.2.5 and MySQL Cluster
+ NDB 6.3.2, it is also possible to specify whether an
+ individual column is stored on disk or in memory by using
+ a <literal>STORAGE</literal> clause as part of the
+ column's definition in a <literal role="stmt">CREATE
+ TABLE</literal> or <literal role="stmt">ALTER
+ TABLE</literal> statement. <literal>STORAGE DISK</literal>
+ causes the column to be stored on disk, and
+ <literal>STORAGE MEMORY</literal> causes in-memory storage
+ to be used. See <xref linkend="create-table"/>, for more
+ information.
+ </para>
+ </note>
+
+ <para>
+ Once table <literal>ts_1</literal> has been created as
+ shown, you can perform
+ <literal role="stmt">INSERT</literal>,
+ <literal role="stmt">SELECT</literal>,
+ <literal role="stmt">UPDATE</literal>, and
+ <literal role="stmt">DELETE</literal> statements on it just
+ as you would with any other MySQL table.
+ </para>
+
+ <para>
+ For table <literal>dt_1</literal> as it has been defined
+ here, only the <literal>dob</literal> and
+ <literal>joined</literal> columns are stored on disk. This
+ is because there are indexes on the <literal>id</literal>,
+ <literal>last_name</literal>, and
+ <literal>first_name</literal> columns, and so data belonging
+ to these columns is stored in RAM. In MySQL 5.1 (including
+ MySQL Cluster NDB 6.X and 7.X through 7.1), only nonindexed
+ columns can be held on disk; indexes and indexed column data
+ continue to be stored in memory. This tradeoff between the
+ use of indexes and conservation of RAM is something you must
+ keep in mind as you design Disk Data tables.
+ </para>
+ </listitem>
+
+ </orderedlist>
+
+ <formalpara>
+
+ <title>Performance note</title>
+
+ <para>
+ The performance of a cluster using Disk Data storage is
+ greatly improved if Disk Data files are kept on a separate
+ physical disk from the data node file system. This must be
+ done for each data node in the cluster to derive any
+ noticeable benefit.
+ </para>
+
+ </formalpara>
+
+ <para>
+ You may use absolute and relative file system paths with
+ <literal>ADD UNDOFILE</literal> and <literal>ADD
+ DATAFILE</literal>. Relative paths are calculated relative to
+ the data node's data directory. You may also use symbolic links;
+ see <xref linkend="mysql-cluster-disk-data-symlinks"/>, for more
+ information and examples.
+ </para>
+
+ <para>
+ A log file group, a tablespace, and any Disk Data tables using
+ these must be created in a particular order. The same is true
+ for dropping any of these objects:
+ </para>
+
+ <indexterm>
+ <primary>MySQL Cluster Disk Data</primary>
+ <secondary>dropping Disk Data objects</secondary>
+ </indexterm>
+
+ <itemizedlist>
+
+ <listitem>
+ <para>
+ A log file group cannot be dropped as long as any
+ tablespaces are using it.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ A tablespace cannot be dropped as long as it contains any
+ data files.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ You cannot drop any data files from a tablespace as long as
+ there remain any tables which are using the tablespace.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ Beginning with MySQL 5.1.12, it is no longer possible to
+ drop files created in association with a different
+ tablespace than the one with which the files were created.
+ (Bug#20053)
+ </para>
+ </listitem>
+
+ </itemizedlist>
+
+ <para>
+ For example, to drop all the objects created so far in this
+ section, you would use the following statements:
+ </para>
+
+<programlisting>
+mysql> <userinput>DROP TABLE dt_1;</userinput>
+
+mysql> <userinput>ALTER TABLESPACE ts_1</userinput>
+ -> <userinput>DROP DATAFILE 'data_2.dat'</userinput>
+ -> <userinput>ENGINE NDBCLUSTER;</userinput>
+
+mysql> <userinput>ALTER TABLESPACE ts_1</userinput>
+ -> <userinput>DROP DATAFILE 'data_1.dat'</userinput>
+ -> <userinput>ENGINE NDBCLUSTER;</userinput>
+
+mysql> <userinput>DROP TABLESPACE ts_1</userinput>
+ -> <userinput>ENGINE NDBCLUSTER;</userinput>
+
+mysql> <userinput>DROP LOGFILE GROUP lg_1</userinput>
+ -> <userinput>ENGINE NDBCLUSTER;</userinput>
+</programlisting>
+
+ <para>
+ These statements must be performed in the order shown, except
+ that the two <literal>ALTER TABLESPACE ... DROP
+ DATAFILE</literal> statements may be executed in either order.
+ </para>
+
+ <para>
+ You can obtain information about data files used by Disk Data
+ tables by querying the <literal role="is">FILES</literal> table
+ in the <literal>INFORMATION_SCHEMA</literal> database. An extra
+ <quote><literal>NULL</literal> row</quote> was added to this
+ table in MySQL 5.1.14 for providing additional information about
+ undo log files. For more information and examples of use, see
+ <xref linkend="files-table"/>.
+ </para>
+
+ <para>
+ Beginning with MySQL Cluster NDB 6.3.27 and MySQL Cluster NDB
+ 7.0.8, it is also possible to view information about allocated
+ and free disk space for each Disk Data table or table partition
+ using the <command>ndb_desc</command> utility. For more
+ information, see
+ <xref linkend="mysql-cluster-programs-ndb-desc"/>.
+ </para>
+
+ </section>
+
+ <section id="mysql-cluster-disk-data-symlinks">
+
+ <title>Using Symbolic Links with Disk Data Objects</title>
+
+ <remark role="note">
+ [js] Thanks to Jeb Miller for his help with this section!
+ </remark>
+
+ <para>
+ The performance of a MySQL Cluster that uses Disk Data storage
+ can be greatly improved by separating data node file systems
+ from undo log files and tablespace data files and placing these
+ on different disks. While there is currently no direct support
+ for this in MySQL Cluster, it is possible to achieve this
+ separation using symbolic links.
+ </para>
+
+ <para>
+ Each data node in the cluster creates a file system in the
+ directory named
+ <filename>ndb_<replaceable>node_id</replaceable>_fs</filename>
+ under the data node's
+ <literal><link linkend="ndbparam-ndbd-datadir">DataDir</link></literal>
+ as defined in the <filename>config.ini</filename> file. In this
+ example, we assume that each data node host has 3 disks, aliased
+ as <filename>/data0</filename>, <filename>/data1</filename>, and
+ <filename>/data2</filename>, and that the cluster's
+ <filename>config.ini</filename> includes the following:
+
+<programlisting>
+[ndbd default]
+DataDir= /data0
+</programlisting>
+
+ Our objective is to place all Disk Data log files in
+ <filename>/data1</filename>, and all Disk Data data files in
+ <filename>/data2</filename>, on each data node host.
+ </para>
+
+ <note>
+ <para>
+ In this example, we assume that the cluster's data node
+ hosts are all using Linux operating systems. For other
+ platforms, you may need to substitute you operating
+ system's commands for those shown here.
+ </para>
+ </note>
+
+ <para>
+ To accomplish this, perform the following steps:
+
+ <itemizedlist>
+
+ <listitem>
+ <para>
+ Under the data node file system create symbolic links
+ pointing to the other drives:
+
+<programlisting>
+shell> <userinput>cd /data0/ndb_2_fs</userinput>
+shell> <userinput>ls</userinput>
+D1 D10 D11 D2 D8 D9 LCP
+shell> <userinput>ln -s /data0 dnlogs</userinput>
+shell> <userinput>ln -s /data1 dndata</userinput>
+</programlisting>
+
+ You should now have two symbolic links:
+
+<programlisting>
+shell> <userinput>ls -l --hide=D*</userinput>
+lrwxrwxrwx 1 user group 30 2007-03-19 13:58 dndata -> /data1
+lrwxrwxrwx 1 user group 30 2007-03-19 13:59 dnlogs -> /data2
+</programlisting>
+
+ We show this only for the data node with node ID 2;
+ however, you must do this for <emphasis>each</emphasis>
+ data node.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ Now, in the <command>mysql</command> client, create a log
+ file group and tablespace using the symbolic links, as
+ shown here:
+ </para>
+
+<programlisting>
+mysql> <userinput>CREATE LOGFILE GROUP lg1</userinput>
+ -> <userinput>ADD UNDOFILE 'dnlogs/undo1.log'</userinput>
+ -> <userinput>INITIAL_SIZE 150M</userinput>
+ -> <userinput>UNDO_BUFFER_SIZE = 1M</userinput>
+ -> <userinput>ENGINE=NDBCLUSTER;</userinput>
+
+mysql> <userinput>CREATE TABLESPACE ts1</userinput>
+ -> <userinput>ADD DATAFILE 'dndata/data1.log'</userinput>
+ -> <userinput>USE LOGFILE GROUP lg1</userinput>
+ -> <userinput>INITIAL_SIZE 1G</userinput>
+ -> <userinput>ENGINE=NDBCLUSTER;</userinput>
+</programlisting>
+
+ <para>
+ Verify that the files were created and placed correctly as
+ shown here:
+ </para>
+
+<programlisting>
+shell> <userinput>cd /data1</userinput>
+shell> <userinput>ls -l</userinput>
+total 2099304
+-rw-rw-r&ddash; 1 user group 157286400 2007-03-19 14:02 undo1.dat
+
+shell> <userinput>cd /data2</userinput>
+shell> <userinput>ls -l</userinput>
+total 2099304
+-rw-rw-r&ddash; 1 user group 1073741824 2007-03-19 14:02 data1.dat
+</programlisting>
+ </listitem>
+
+ <listitem>
+ <para>
+ If you are running multiple data nodes on one host, you
+ must take care to avoid having them try to use the same
+ space for Disk Data files. You can make this easier by
+ creating a symbolic link in each data node filesystem.
+ Suppose you are using <filename>/data0</filename> for both
+ data node filesystems, but you wish to have the Disk Data
+ files for both nodes on <filename>/data1</filename>. In
+ this case, you can do something similar to what is shown
+ here:
+ </para>
+
+<programlisting>
+shell> <userinput>cd /data0</userinput>
+shell> <userinput>ln -s ndb_2_fs/dd /data1/dn2</userinput>
+shell> <userinput>ln -s ndb_3_fs/dd /data1/dn3</userinput>
+shell> <userinput>ls -l --hide=D* ndb_2_fs</userinput>
+lrwxrwxrwx 1 user group 30 2007-03-19 14:22 dd -> /data1/dn2
+shell> <userinput>ls -l --hide=D* ndb_3_fs</userinput>
+lrwxrwxrwx 1 user group 30 2007-03-19 14:22 dd -> /data1/dn3
+</programlisting>
+
+ <para>
+ Now you can create a logfile group and tablespace using
+ the symbolic link, like this:
+ </para>
+
+<programlisting>
+mysql> <userinput>CREATE LOGFILE GROUP lg1</userinput>
+ -> <userinput>ADD UNDOFILE 'dd/undo1.log'</userinput>
+ -> <userinput>INITIAL_SIZE 150M</userinput>
+ -> <userinput>UNDO_BUFFER_SIZE = 1M</userinput>
+ -> <userinput>ENGINE=NDBCLUSTER;</userinput>
+
+mysql> <userinput>CREATE TABLESPACE ts1</userinput>
+ -> <userinput>ADD DATAFILE 'dd/data1.log'</userinput>
+ -> <userinput>USE LOGFILE GROUP lg1</userinput>
+ -> <userinput>INITIAL_SIZE 1G</userinput>
+ -> <userinput>ENGINE=NDBCLUSTER;</userinput>
+</programlisting>
+
+ <para>
+ Verify that the files were created and placed correctly as
+ shown here:
+ </para>
+
+<programlisting>
+shell> <userinput>cd /data1</userinput>
+shell> <userinput>ls</userinput>
+dn2 dn3
+shell> <userinput>ls dn2</userinput>
+undo1.log data1.log
+shell> <userinput>ls dn3</userinput>
+undo1.log data1.log
+</programlisting>
+ </listitem>
+
+ </itemizedlist>
+ </para>
+
+ </section>
+
+ <section id="mysql-cluster-disk-data-storage-requirements">
+
+ <title>MySQL Cluster Disk Data Storage Requirements</title>
+
+ <para>
+ <indexterm>
+ <primary>MySQL Cluster Disk Data</primary>
+ <secondary>storage requirements</secondary>
+ </indexterm>
+
+ The following items apply to Disk Data storage requirements:
+
+ <itemizedlist>
+
+ <listitem>
+ <para>
+ Variable-length columns of Disk Data tables take up a
+ fixed amount of space. For each row, this is equal to the
+ space required to store the largest possible value for
+ that column.
+ </para>
+
+ <para>
+ For general information about calculating these values,
+ see <xref linkend="storage-requirements"/>.
+ </para>
+
+ <para>
+ You can obtain an estimate the amount of space available
+ in data files and undo log files by querying the
+ <literal role="is">INFORMATION_SCHEMA.FILES</literal>
+ table. For more information and examples, see
+ <xref linkend="files-table"/>.
+ </para>
+
+ <note>
+ <para>
+ The <literal role="stmt">OPTIMIZE TABLE</literal>
+ statement does not have any effect on Disk Data tables.
+ </para>
+ </note>
+ </listitem>
+
+ <listitem>
+ <para>
+ In a Disk Data table, the first 256 bytes of a
+ <literal role="type">TEXT</literal> or
+ <literal role="type">BLOB</literal> column are stored in
+ memory; only the remainder is stored on disk.
+ </para>
+ </listitem>
+
+ <listitem>
+ <para>
+ Each row in a Disk Data table uses 8 bytes in memory to
+ point to the data stored on disk. This means that, in some
+ cases, converting an in-memory column to the disk-based
+ format can actually result in greater memory usage. For
+ example, convering a <literal>CHAR(4)</literal> column
+ from memory-based to disk-based format increases the
+ amount of <literal>DataMemory</literal> used per row from
+ 4 to 8 bytes.
+ </para>
+ </listitem>
+
+ </itemizedlist>
+ </para>
+
+ <important>
+ <para>
+ Starting the cluster with the <option>--initial</option>
+ option does <emphasis>not</emphasis> remove Disk Data files.
+ You must remove these manually prior to performing an initial
+ restart of the cluster.
+ </para>
+ </important>
+
+ </section>
+
+ </section>
+
<section id="mysql-cluster-online-add-node">
<title>Adding MySQL Cluster Data Nodes Online</title>
Modified: trunk/refman-5.1/mysql-cluster.xml
===================================================================
--- trunk/refman-5.1/mysql-cluster.xml 2009-09-25 07:18:52 UTC (rev 16804)
+++ trunk/refman-5.1/mysql-cluster.xml 2009-09-25 09:34:22 UTC (rev 16805)
Changed blocks: 1, Lines Added: 0, Lines Deleted: 4; 760 bytes
@@ -420,10 +420,6 @@
<xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="mysql-cluster-replication.xml"/>
- <xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="mysql-cluster-disk-data.xml"/>
-
- <xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="mysql-cluster-interconnects.xml"/>
-
<xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="mysql-cluster-limitations.xml"/>
<xi:include xmlns:xi="http://www.w3.org/2001/XInclude" href="mysql-cluster-roadmap.xml"/>
Modified: trunk/refman-5.1-maria/Makefile.depends
===================================================================
--- trunk/refman-5.1-maria/Makefile.depends 2009-09-25 07:18:52 UTC (rev 16804)
+++ trunk/refman-5.1-maria/Makefile.depends 2009-09-25 09:34:22 UTC (rev 16805)
Changed blocks: 1, Lines Added: 0, Lines Deleted: 2; 753 bytes
@@ -1262,8 +1262,6 @@
../refman-5.1/metadata/introduction.idmap \
../refman-5.1/metadata/language-structure-core.idmap \
../refman-5.1/metadata/mysql-cluster-configuration-core.idmap \
- ../refman-5.1/metadata/mysql-cluster-disk-data.idmap \
- ../refman-5.1/metadata/mysql-cluster-interconnects.idmap \
../refman-5.1/metadata/mysql-cluster-limitations.idmap \
../refman-5.1/metadata/mysql-cluster-management.idmap \
../refman-5.1/metadata/mysql-cluster-multi-computer.idmap \
Modified: trunk/refman-5.4/Makefile.depends
===================================================================
--- trunk/refman-5.4/Makefile.depends 2009-09-25 07:18:52 UTC (rev 16804)
+++ trunk/refman-5.4/Makefile.depends 2009-09-25 09:34:22 UTC (rev 16805)
Changed blocks: 2, Lines Added: 0, Lines Deleted: 2; 1103 bytes
@@ -3817,7 +3817,6 @@
../refman-5.1/metadata/faqs-core.idmap \
../refman-5.1/metadata/introduction.idmap \
../refman-5.1/metadata/mysql-cluster-configuration-core.idmap \
- ../refman-5.1/metadata/mysql-cluster-disk-data.idmap \
../refman-5.1/metadata/mysql-cluster-limitations.idmap \
../refman-5.1/metadata/mysql-cluster-management.idmap \
../refman-5.1/metadata/mysql-cluster-multi-computer.idmap \
@@ -5365,7 +5364,6 @@
sql_syntax_data_definition_SOURCES = sql-syntax-data-definition.xml $(sql_syntax_data_definition_INCLUDES)
sql_syntax_data_definition_IDMAPS = \
../refman-4.1/metadata/sql-syntax-data-definition.idmap \
- ../refman-5.1/metadata/mysql-cluster-disk-data.idmap \
../refman-5.1/metadata/mysql-cluster-limitations.idmap \
../refman-5.1/metadata/mysql-cluster-management.idmap \
../refman-5.1/metadata/mysql-cluster.idmap \
Modified: trunk/topic-guides/topics-5.1/Makefile.depends
===================================================================
--- trunk/topic-guides/topics-5.1/Makefile.depends 2009-09-25 07:18:52 UTC (rev 16804)
+++ trunk/topic-guides/topics-5.1/Makefile.depends 2009-09-25 09:34:22 UTC (rev 16805)
Changed blocks: 3, Lines Added: 1, Lines Deleted: 5; 9957 bytes
@@ -101,9 +101,7 @@
../../common/phrases.ent \
../../refman-5.1/faqs-core.xml \
../../refman-5.1/mysql-cluster-configuration-core.xml \
- ../../refman-5.1/mysql-cluster-disk-data.xml \
../../refman-5.1/mysql-cluster-glossary.xml \
- ../../refman-5.1/mysql-cluster-interconnects.xml \
../../refman-5.1/mysql-cluster-limitations.xml \
../../refman-5.1/mysql-cluster-management.xml \
../../refman-5.1/mysql-cluster-multi-computer.xml \
@@ -199,9 +197,7 @@
../../refman-5.1/faqs-core.xml \
../../refman-5.1/legalnotice.en.xml \
../../refman-5.1/mysql-cluster-configuration-core.xml \
- ../../refman-5.1/mysql-cluster-disk-data.xml \
../../refman-5.1/mysql-cluster-glossary.xml \
- ../../refman-5.1/mysql-cluster-interconnects.xml \
../../refman-5.1/mysql-cluster-limitations.xml \
../../refman-5.1/mysql-cluster-management.xml \
../../refman-5.1/mysql-cluster-multi-computer.xml \
@@ -1398,7 +1394,7 @@
# This rules sets the rebuild for fragments of an arby file when a source file has changed
-mysql-cluster-excerpt-aspec.xml.mysql-cluster-programs.none.chapter.xml mysql-cluster-excerpt-aspec.xml.mysql-cluster-replication.none.chapter.xml mysql-cluster-excerpt-aspec.xml.mysql-cluster-configuration.none.chapter.xml mysql-cluster-excerpt-aspec.xml.mysql-cluster-interconnects.none.chapter.xml mysql-cluster-excerpt-aspec.xml.mysql-cluster-glossary.none.chapter.xml mysql-cluster-excerpt-aspec.xml.mysql-cluster-overview.none.chapter.xml mysql-cluster-excerpt-aspec.xml.faqs-mysql-cluster.none.chapter.xml mysql-cluster-excerpt-aspec.xml.mysql-cluster.all.preface.xml mysql-cluster-excerpt-aspec.xml.mysql-cluster-disk-data.none.chapter.xml mysql-cluster-excerpt-aspec.xml.mysql-cluster-management.none.chapter.xml mysql-cluster-excerpt-aspec.xml.mysql-cluster-upgrade-downgrade.none.chapter.xml mysql-cluster-excerpt-aspec.xml.mysql-cluster-limitations.none.chapter.xml mysql-cluster-excerpt-aspec.xml.mysql-cluster-options-variables.none.chapter.xml mysql-cluster-excerpt-aspec.x!
ml.mysql-cluster-security.none.chapter.xml mysql-cluster-excerpt-aspec.xml.mysql-cluster-multi-computer.none.chapter.xml mysql-cluster-excerpt-aspec.xml.mysql-cluster-roadmap.none.chapter.xml: ../../refman-5.1/../refman-common/images/published/replicas-groups-1-2.png ../../refman-5.1/metadata/mysql-cluster-multi-computer.idmap ../../refman-5.1/mysql-cluster-security.xml ../../refman-5.1/mysql-cluster.xml ../../refman-5.1/metadata/faqs-core.idmap ../../refman-5.1/metadata/mysql-cluster-management.idmap ../../refman-5.1/metadata/mysql-cluster-replication.idmap ../../refman-5.1/../refman-common/images/published/rolling-restarts.png mysql-cluster-excerpt-aspec.xml ../../refman-5.1/../refman-common/images/published/cluster-components-1.png ../../refman-5.1/mysql-cluster-replication.xml ../../refman-5.1/metadata/mysql-cluster-roadmap.idmap ../../refman-5.1/../refman-common/images/published/cluster-security-network-2.png ../../refman-5.1/../refman-common/images/published/replicas-!
groups-1-1.png ../../refman-5.1/metadata/mysql-cluster.idmap .!
./../ref
man-5.1/../refman-common/images/published/cluster-security-network-1.png ../../refman-5.1/mysql-cluster-configuration-core.xml ../../refman-5.1/metadata/mysql-cluster-security.idmap ../../refman-5.1/metadata/mysql-cluster-limitations.idmap ../../refman-5.1/../refman-common/images/published/cluster-security-network-3.png all-entities.ent ../../refman-5.1/mysql-cluster-overview.xml ../../refman-5.1/mysql-cluster-management.xml ../../refman-5.1/../refman-common/images/published/cluster-replication-overview.png ../../common/phrases.ent ../../refman-5.1/mysql-cluster-disk-data.xml ../../refman-5.1/mysql-cluster-glossary.xml ../../refman-5.1/mysql-cluster-interconnects.xml ../../refman-common/urls.ent ../../refman-5.1/metadata/mysql-cluster-disk-data.idmap ../../refman-5.1/../refman-common/images/published/cluster-replication-binlog-injector.png ../../refman-5.1/mysql-cluster-multi-computer.xml ../../refman-5.1/versions.ent ../../refman-5.1/../refman-common/images/published/ndb-si!
ze-pl-1.png mysql-cluster-excerpt-base.xml ../../common/fixedchars.ent ../../refman-5.1/../refman-common/images/published/mysql-cluster-upgrade-downgrade-6-x.png ../../refman-5.1/../refman-common/images/published/cluster-circular-replication-2.png ../../refman-5.1/metadata/mysql-cluster-configuration-core.idmap ../../refman-5.1/mysql-cluster-limitations.xml ../../refman-5.1/../refman-common/images/published/mysql-cluster-upgrade-downgrade-5-1.png ../../refman-5.1/../refman-common/images/published/cluster-replication-multi-master-mysqlds.png ../../refman-5.1/../refman-common/images/published/cluster-replication-ipv6.png ../../refman-5.1/metadata/mysql-cluster-overview.idmap ../../refman-5.1/metadata/mysql-cluster-interconnects.idmap ../../refman-5.1/mysql-cluster-roadmap.xml ../../refman-5.1/../refman-common/images/published/multi-comp-1.png ../../refman-5.1/../refman-common/images/published/cluster-circular-replication-1.png ../../refman-5.1/mysql-cluster-programs-core.xml !
../../refman-5.1/metadata/mysql-cluster-programs-core.idmap ..!
/../refm
an-5.1/faqs-core.xml ../../refman-5.1/metadata/mysql-cluster-glossary.idmap ../../refman-5.1/../refman-common/images/published/cluster-replication-multi-master.png ../../refman-5.1/mysql-cluster-optvar-core.xml ../../refman-5.1/metadata/mysql-cluster-optvar-core.idmap
+mysql-cluster-excerpt-aspec.xml.mysql-cluster-programs.none.chapter.xml mysql-cluster-excerpt-aspec.xml.mysql-cluster-replication.none.chapter.xml mysql-cluster-excerpt-aspec.xml.mysql-cluster-configuration.none.chapter.xml mysql-cluster-excerpt-aspec.xml.mysql-cluster-interconnects.none.chapter.xml mysql-cluster-excerpt-aspec.xml.mysql-cluster-glossary.none.chapter.xml mysql-cluster-excerpt-aspec.xml.mysql-cluster-overview.none.chapter.xml mysql-cluster-excerpt-aspec.xml.faqs-mysql-cluster.none.chapter.xml mysql-cluster-excerpt-aspec.xml.mysql-cluster.all.preface.xml mysql-cluster-excerpt-aspec.xml.mysql-cluster-disk-data.none.chapter.xml mysql-cluster-excerpt-aspec.xml.mysql-cluster-management.none.chapter.xml mysql-cluster-excerpt-aspec.xml.mysql-cluster-upgrade-downgrade.none.chapter.xml mysql-cluster-excerpt-aspec.xml.mysql-cluster-limitations.none.chapter.xml mysql-cluster-excerpt-aspec.xml.mysql-cluster-options-variables.none.chapter.xml mysql-cluster-excerpt-aspec.x!
ml.mysql-cluster-security.none.chapter.xml mysql-cluster-excerpt-aspec.xml.mysql-cluster-multi-computer.none.chapter.xml mysql-cluster-excerpt-aspec.xml.mysql-cluster-roadmap.none.chapter.xml: ../../refman-5.1/../refman-common/images/published/replicas-groups-1-2.png ../../refman-5.1/metadata/mysql-cluster-multi-computer.idmap ../../refman-5.1/mysql-cluster-security.xml ../../refman-5.1/mysql-cluster.xml ../../refman-5.1/metadata/faqs-core.idmap ../../refman-5.1/metadata/mysql-cluster-management.idmap ../../refman-5.1/metadata/mysql-cluster-replication.idmap ../../refman-5.1/../refman-common/images/published/rolling-restarts.png mysql-cluster-excerpt-aspec.xml ../../refman-5.1/../refman-common/images/published/cluster-components-1.png ../../refman-5.1/mysql-cluster-replication.xml ../../refman-5.1/metadata/mysql-cluster-roadmap.idmap ../../refman-5.1/../refman-common/images/published/cluster-security-network-2.png ../../refman-5.1/../refman-common/images/published/replicas-!
groups-1-1.png ../../refman-5.1/metadata/mysql-cluster.idmap .!
./../ref
man-5.1/../refman-common/images/published/cluster-security-network-1.png ../../refman-5.1/mysql-cluster-configuration-core.xml ../../refman-5.1/metadata/mysql-cluster-security.idmap ../../refman-5.1/metadata/mysql-cluster-limitations.idmap ../../refman-5.1/../refman-common/images/published/cluster-security-network-3.png all-entities.ent ../../refman-5.1/mysql-cluster-overview.xml ../../refman-5.1/mysql-cluster-management.xml ../../refman-5.1/../refman-common/images/published/cluster-replication-overview.png ../../common/phrases.ent ../../refman-5.1/mysql-cluster-glossary.xml ../../refman-common/urls.ent ../../refman-5.1/../refman-common/images/published/cluster-replication-binlog-injector.png ../../refman-5.1/mysql-cluster-multi-computer.xml ../../refman-5.1/versions.ent ../../refman-5.1/../refman-common/images/published/ndb-size-pl-1.png mysql-cluster-excerpt-base.xml ../../common/fixedchars.ent ../../refman-5.1/../refman-common/images/published/mysql-cluster-upgrade-downgr!
ade-6-x.png ../../refman-5.1/../refman-common/images/published/cluster-circular-replication-2.png ../../refman-5.1/metadata/mysql-cluster-configuration-core.idmap ../../refman-5.1/mysql-cluster-limitations.xml ../../refman-5.1/../refman-common/images/published/mysql-cluster-upgrade-downgrade-5-1.png ../../refman-5.1/../refman-common/images/published/cluster-replication-multi-master-mysqlds.png ../../refman-5.1/../refman-common/images/published/cluster-replication-ipv6.png ../../refman-5.1/metadata/mysql-cluster-overview.idmap ../../refman-5.1/mysql-cluster-roadmap.xml ../../refman-5.1/../refman-common/images/published/multi-comp-1.png ../../refman-5.1/../refman-common/images/published/cluster-circular-replication-1.png ../../refman-5.1/mysql-cluster-programs-core.xml ../../refman-5.1/metadata/mysql-cluster-programs-core.idmap ../../refman-5.1/faqs-core.xml ../../refman-5.1/metadata/mysql-cluster-glossary.idmap ../../refman-5.1/../refman-common/images/published/cluster-repli!
cation-multi-master.png ../../refman-5.1/mysql-cluster-optvar-!
core.xml
../../refman-5.1/metadata/mysql-cluster-optvar-core.idmap
$(MAKE) mysql-cluster-excerpt.arbitraryforce
# This rules sets the fragments of a final arby file to be dependent for the -arbitrary (true) target
| Thread |
|---|
| • svn commit - mysqldoc@docsrva: r16805 - in trunk: ndbapi refman-4.1 refman-5.0 refman-5.1 refman-5.1-maria refman-5.4 topic-guides/topics-5.1 | jon.stephens | 25 Sep |