List:Commits« Previous MessageNext Message »
From:Guilhem Bichot Date:July 3 2009 12:26pm
Subject:bzr commit into mysql-5.4 branch (guilhem:2814)
View as plain text  
#At file:///home/mysql_src/bzrrepos/mysql-azalea-guilhem/ based on revid:alik@stripped

 2814 Guilhem Bichot	2009-07-03 [merge]
      merge of Summit into Azalea (merging the result of Mikael's summit->azalea merge, into the latest azalea).

    removed:
      config/ac-macros/dtrace.m4
      sql/probes.d
    added:
      config/ac-macros/dtrace.m4
      include/atomic/solaris.h
      include/probes_mysql.d.base
      include/probes_mysql.h
      include/probes_mysql_nodtrace.h
      scripts/dheadgen.pl
      storage/innobase/win_atomics32_test.c
      storage/innobase/win_atomics64_test.c
      support-files/dtrace/
      support-files/dtrace/locktime.d
      support-files/dtrace/query-execandqc.d
      support-files/dtrace/query-filesort-time.d
      support-files/dtrace/query-network-time.d
      support-files/dtrace/query-parse-time.d
      support-files/dtrace/query-rowops.d
      support-files/dtrace/query-time.d
      support-files/dtrace/statement-time.d
      support-files/dtrace/statement-type-aggregate.d
    modified:
      .bzr-mysql/default.conf
      BUILD/build_mccge.sh
      BUILD/check-cpu
      configure.in
      include/Makefile.am
      include/atomic/nolock.h
      include/atomic/x86-gcc.h
      include/my_atomic.h
      libmysql/Makefile.shared
      libmysqld/Makefile.am
      mysql-test/include/default_mysqld.cnf
      mysql-test/r/innodb.result
      mysql-test/r/merge.result
      mysql-test/r/partition_innodb.result
      mysql-test/suite/sys_vars/r/innodb_autoextend_increment_basic.result
      mysql-test/suite/sys_vars/r/innodb_file_io_threads_basic.result
      mysql-test/suite/sys_vars/r/innodb_max_dirty_pages_pct_basic.result
      mysql-test/suite/sys_vars/r/innodb_thread_concurrency_basic.result
      mysql-test/suite/sys_vars/r/table_definition_cache_basic.result
      mysql-test/suite/sys_vars/r/table_open_cache_basic.result
      mysql-test/suite/sys_vars/t/innodb_file_io_threads_basic.test
      mysql-test/suite/sys_vars/t/innodb_max_dirty_pages_pct_basic.test
      mysql-test/suite/sys_vars/t/innodb_thread_concurrency_basic.test
      mysql-test/suite/sys_vars/t/table_definition_cache_basic.test
      mysql-test/suite/sys_vars/t/table_open_cache_basic.test
      mysql-test/t/merge.test
      mysql-test/t/partition_innodb.test
      mysys/Makefile.am
      mysys/mf_keycache.c
      scripts/Makefile.am
      scripts/make_binary_distribution.sh
      sql/Makefile.am
      sql/filesort.cc
      sql/ha_ndbcluster.cc
      sql/handler.cc
      sql/mysql_priv.h
      sql/mysqld.cc
      sql/net_serv.cc
      sql/sp_head.cc
      sql/sql_cache.cc
      sql/sql_class.h
      sql/sql_connect.cc
      sql/sql_cursor.cc
      sql/sql_insert.cc
      sql/sql_parse.cc
      sql/sql_prepare.cc
      sql/sql_select.cc
      sql/sql_update.cc
      storage/archive/Makefile.am
      storage/archive/ha_archive.cc
      storage/blackhole/Makefile.am
      storage/blackhole/ha_blackhole.cc
      storage/csv/Makefile.am
      storage/csv/ha_tina.cc
      storage/example/Makefile.am
      storage/example/ha_example.cc
      storage/federated/Makefile.am
      storage/federated/ha_federated.cc
      storage/federated/ha_federated.h
      storage/heap/Makefile.am
      storage/heap/ha_heap.cc
      storage/innobase/CMakeLists.txt
      storage/innobase/Makefile.am
      storage/innobase/btr/btr0cur.c
      storage/innobase/btr/btr0sea.c
      storage/innobase/buf/buf0buf.c
      storage/innobase/handler/ha_innodb.cc
      storage/innobase/include/buf0buf.ic
      storage/innobase/include/log0log.h
      storage/innobase/include/os0file.h
      storage/innobase/include/os0sync.h
      storage/innobase/include/os0sync.ic
      storage/innobase/include/srv0srv.h
      storage/innobase/include/sync0rw.h
      storage/innobase/include/sync0rw.ic
      storage/innobase/include/sync0sync.h
      storage/innobase/include/sync0sync.ic
      storage/innobase/include/univ.i
      storage/innobase/include/ut0ut.h
      storage/innobase/log/log0log.c
      storage/innobase/mem/mem0pool.c
      storage/innobase/os/os0file.c
      storage/innobase/row/row0sel.c
      storage/innobase/srv/srv0srv.c
      storage/innobase/srv/srv0start.c
      storage/innobase/sync/sync0arr.c
      storage/innobase/sync/sync0rw.c
      storage/innobase/sync/sync0sync.c
      storage/innobase/ut/ut0ut.c
      storage/myisam/Makefile.am
      storage/myisam/ha_myisam.cc
      storage/myisammrg/Makefile.am
      storage/myisammrg/ha_myisammrg.cc
      storage/ndb/include/portlib/prefetch.h
      support-files/Makefile.am
      support-files/my-innodb-heavy-4G.cnf.sh
      win/README
      win/configure.js
=== modified file '.bzr-mysql/default.conf'
--- a/.bzr-mysql/default.conf	2009-06-25 14:37:27 +0000
+++ b/.bzr-mysql/default.conf	2009-07-02 14:23:36 +0000
@@ -2,4 +2,4 @@
 tree_location = "bzr+ssh://bk-internal.mysql.com/bzrroot/server/mysql-azalea"
 post_commit_to = "commits@stripped"
 post_push_to = "commits@stripped"
-tree_name = "mysql-5.4" 
+tree_name = "mysql-5.4"

=== modified file 'BUILD/build_mccge.sh'
--- a/BUILD/build_mccge.sh	2009-05-25 22:01:59 +0000
+++ b/BUILD/build_mccge.sh	2009-07-02 14:23:36 +0000
@@ -63,10 +63,12 @@ sysadmin_usage()
 cat <<EOF
 
   This script can be used to build MySQL Cluster Carrier Grade Edition
-  based on a source code release you received from MySQL.
+  based on a source code release you received from MySQL. It can also
+  be used to build many variants other variants of MySQL, in particular
+  various performance-optimised versions of MySQL.
 
   It is assumed that you are building on a computer which is of the 
-  same type as that on which you intend to run MySQL Cluster.
+  same type as that on which you intend to run MySQL/MySQL Cluster.
 
   The simplest possible way to run this script is to allow it to use the 
   built-in defaults everywhere, invoking it simply as:
@@ -75,29 +77,35 @@ cat <<EOF
 
   This performs the following operations:
     1) Detects the operating system. Currently, Linux, FreeBSD, Solaris 
-      10/11, and Mac OS X are supported by this script.
+      8/9/10/11, and Mac OS X are supported by this script.
     2) Detect the type of CPU being used. Currently supported processors 
       are: x86 for all supported operating systems, Itanium for Linux 
-      with GCC, and SPARC for Solaris using the Forte compiler.
+      with GCC, and x86 + SPARC for Solaris using the Forte compiler and
+      finally x86 on Linux using the Intel compiler.
     3) Invokes the GCC compiler.
-    4) Builds a set of MySQL Cluster Carrier Grade Edition binaries; for
+    4) Builds a set of MySQL/MySQL Cluster binaries; for
       more information about these, see --extended-help.
+    5) Default compiler is always gcc.
   
   The default version assumes that you have a source code tarball from 
   which you are building, and thus autoconf and automake do not need to 
   be run. If you have downloaded a BitKeeper tree then you should read 
   --developer-help.
 
-  If you are building MySQL Cluster Carrier Grade Edition for commercial 
+  If you are building MySQL/MySQL Cluster for commercial 
   use then you need to set the --commercial flag to ensure that the 
   commercial libraries are compiled in, rather than the GPL-only 
   libraries. The default is to build a GPL version of MySQL Cluster 
   Carrier Grade Edition.
 
-  If your building on a Solaris SPARC machine you must set 
+  If your building on a Solaris SPARC machine and you want to compile
+  using SunStudio you must set 
   --compiler=forte; if you want to build using the Intel compiler on 
   Linux, you need to set --compiler=icc.
 
+  A synonym for forte is SunStudio, so one can also use
+  --compiler=SunStudio.
+
   If you want to make sure that a 64-bit version is built then you 
   should add the flag --64. This is always set on Solaris machines and 
   when check-cpu is able to discover that a 64-bit CPU is being used. If 
@@ -133,6 +141,7 @@ Usage: $0 [options]
   --help                  Show this help message.
   --sysadmin-help         Show help for system administrators wishing 
                           to build MySQL Cluster Carrier Grade Edition
+                          or other MySQL versions.
   --developer-help        Show help for developers trying to build MySQL
   --with-help             Show extended help on --with-xxx options to 
                           configure
@@ -155,10 +164,10 @@ Usage: $0 [options]
                           MySQL use
   --commercial            Use commercial libraries
   --gpl                   Use gpl libraries
-  --compiler=[gcc|icc|forte]                  Select compiler
-  --cpu=[x86|x86_64|sparc]                    Select CPU type
-                          x86 => 32-bit binary
-                          x86_64 => 64 bit binary unless Mac OS X
+  --compiler=[gcc|icc|forte|SunStudio]        Select compiler
+  --cpu=[x86|x86_64|sparc|itanium]            Select CPU type
+                          x86 => x86 and 32-bit binary
+                          x86_64 => x86 and 64 bit binary
   --warning-mode=[extra|pedantic|normal|no]   Set warning mode level
   --warnings              Set warning mode to normal
   --32                    Build a 32-bit binary even if CPU is 64-bit
@@ -170,8 +179,9 @@ Usage: $0 [options]
   --error-inject          Enable error injection into MySQL Server and 
                           data nodes
   --valgrind              Build with valgrind
-  --fast                  Optimise for CPU architecture buildt on
+  --fast                  Optimise for CPU architecture built on
   --static-linking        Statically link system libraries into binaries
+  --use-tcmalloc          Link with tcmalloc instead of standard malloc (Linux only)
   --with-flags *          Pass extra --with-xxx options to configure
 EOF
   if test "x$1" != "x" ; then
@@ -186,13 +196,14 @@ extended_usage()
   Extended help text for this script:
   -----------------------------------
   This script is intended to make it easier for customers using MySQL
-  Cluster Carrier Grade Edition to build the product from source on 
-  these platforms/compilers: Linux/x86 (32-bit and 64-bit),
-  Solaris 10 and 11/x86/gcc, Solaris 9/Sparc/Forte, and MacOSX/x86/gcc. 
-  The script automatically detects CPU type and operating system; in 
-  most cases this also determines which compiler to use, the exception 
-  being Linux/x86 where you can choose between gcc and icc (gcc is the
-  default).
+  Cluster Carrier Grade Edition, customers using performance-optimised
+  MySQL versions and developers to build the product from source on 
+  these platforms/compilers: Linux/x86 (32-bit and 64-bit) (either using
+  gcc or icc), Linux Itanium, Solaris 8,9,10 and 11 x86 and SPARC using
+  gcc or SunStudio and MacOSX/x86/gcc.
+
+  The script automatically detects CPU type and operating system; The
+  default compiler is always gcc.
 
   To build on other platforms you can use the --print-only option on a
   supported platform and edit the output for a proper set of commands on
@@ -213,7 +224,7 @@ extended_usage()
 
   --package=cge
     storage engines:
-      ARCHIVE, BLACKHOLE, CSV, EXAMPLE, FEDERATED, MYISAM, NDB
+      ARCHIVE, BLACKHOLE, CSV, FEDERATED, MYISAM, NDB
       (All storage engines except InnoDB)
     comment: MySQL Cluster Carrier Grade Edition GPL/Commercial version 
              built from source
@@ -221,7 +232,7 @@ extended_usage()
 
   --package=extended
     storage engines:
-      ARCHIVE, BLACKHOLE, CSV, EXAMPLE, FEDERATED, MYISAM, INNODB, NDB
+      ARCHIVE, BLACKHOLE, CSV, FEDERATED, MYISAM, INNODB, NDB
       (All storage engines)
     comment: MySQL Cluster Carrier Grade Extended Edition GPL/Commercial 
              version built from source
@@ -229,7 +240,7 @@ extended_usage()
 
   --package=pro
     storage engines:
-      ARCHIVE, BLACKHOLE, CSV, EXAMPLE, FEDERATED, INNODB, MYISAM
+      ARCHIVE, BLACKHOLE, CSV, FEDERATED, INNODB, MYISAM
       (All storage engines except NDB)
     comment: MySQL Pro GPL/Commercial version built from 
              source
@@ -296,6 +307,10 @@ extended_usage()
   --with-pic: Build all binaries using position independent assembler
     to avoid problems with dynamic linkers (cannot be overridden).
 
+  --without-example-engine: Ensure that the example engine isn't built,
+    it cannot do any useful things, it's merely intended as documentation.
+    (cannot be overridden)
+
   --with-csv-storage-engine: Ensure that the CSV storage engine is
     included in all builds. Since CSV is required for log tables in
     MySQL 5.1, this option cannot be overridden.
@@ -314,10 +329,6 @@ extended_usage()
   In addition there are some configure options that are specific to
   Linux operating systems:
 
-  --with-fast-mutexes
-    Include an alternative implementation of mutexes that is faster on
-    Linux systems
-
   --enable-assembler
     Include assembler code optimisations for a number of mostly string
     methods. Used for x86 processors only.
@@ -364,18 +375,25 @@ extended_usage()
 
   --with-mysqld-libs=-lmtmalloc
     Used on Solaris to ensure that the proper malloc library is used.
+    Investigations have shown mtmalloc to be the best choice on Solaris,
+    also umem has good performance on Solaris but better debugging
+    capabilities.
 
   Compiler options:
   -----------------
 
   This section describes the compiler options for each of the different
-  platforms supported by thisscript.
+  platforms supported by this script.
 
   The --fast option adds -mtune=cpu_arg to the C/C++ flags (provides
   support for Nocona, K8, and other processors).
 
   Use of the --debug option adds -g to the C/C++ flags.
 
+  In all cases it is possible to override the definition of CC and CXX
+  by calling the script as follows:
+  CC="/usr/local/bin/gcc" CXX="/usr/local/bin/gcc" BUILD/build_mccge.sh
+
   FreeBSD/x86/gcc
   ---------------
     No flags are used. Instead, configure determines the proper flags to 
@@ -383,8 +401,7 @@ extended_usage()
 
   Linux/x86+Itanium/gcc
   -------------
-    No flags are used. Instead the configure script determines the
-    proper flags to use for both normal and debug builds. Discovery of a
+    For debug builds -O is used and otherwise -O3 is used.Discovery of a
     Nocona or Core 2 Duo CPU causes a 64-bit binary to be built;
     otherwise, the binary is 32-bit. To build a 64-bit binary, -m64 is
     added to the C/C++ flags. (To build a 32-bit binary on a 64-bit CPU,
@@ -393,11 +410,11 @@ extended_usage()
   Linux/x86+Itanium/icc
   -------------
     Flags used:
-    CC  = icc -static-libgcc -static-libcxa -i-static
-    C++ = icpc -static-libgcc -static-libcxa -i-static
+    CC  = icc -static-libgcc -static-intel
+    C++ = icpc -static-libgcc -static-intel
     C/C++ flags = -mp -restrict
 
-    On Itanium we also add -no-ftz and -no-prefetch to CC and C++ flags.
+    On Itanium we also add -no-ftz and to CC and C++ flags.
 
   The non-debug versions also add the following:
     C/C++ flags += -O3 unroll2 -ip
@@ -411,20 +428,60 @@ extended_usage()
 
   Solaris/x86/gcc
   ---------------
-    All builds on Solaris are 64-bit, so -m64 is always used in the
-    C/C++ flags. LDFLAGS is set to -m64 -static-libgcc -O/-O2.
+    All builds on Solaris are by default 64-bit, so -m64 is always used in
+    the C/C++ flags. LDFLAGS is set to -m64 -O/-O2/-O3. If for
+    some reason a 32-bit Solaris is used it is necessary to add the flag
+    --32 to the script invocation. Due to bugs in compiling with -O3 on
+    Solaris only -O2 is used by default, when --fast flag is used -O3 will
+    be used instead.
+
+    Sets -m64 (default) or -m32 (if specifically set) in LDFLAGS and
+    C/C++ flags.
 
   Solaris/Sparc/Forte
   -------------------
-    Uses cc-5.0 as CC
-    Sets ASFLAGS=LDFLAGS=xarch=v9, so that we compile Sparc v9 binaries
-    C flags   = -Xa -strconst -xc99=none
-    C++ flags = -noex
-    C/C++ flags = -mt -D_FORTEC -xarch=v9
+    Uses cc as CC and CC as CXX
+    Note that SunStudio uses different binaries for C and C++ compilers.
 
-    For non-debug builds, the following flags are also used:
+    Set -m64 (default) or -m32 (if specifically set) in ASFLAGS,
+    LDFLAGS and C/C++ flags.
 
-    C/C++ flags = -xO3
+    Sets ASFLAGS=LDFLAGS=compiler flags=xarch=sparc, so that we compile
+    Sparc v9 binaries, also -mt is set in all those since we're always
+    building a multithreaded program.
+
+    C flags   = -xstrconst
+    C++ flags = -noex
+
+    Set the following C/C++ flags:
+    -fsimple=1
+    -ftrap=%none
+    -nofstore          This flag is set only on x86
+    -xbuiltin=%all
+    -xlibmil
+    -xlibmopt
+
+    Set the C++ flag:
+    -noex
+
+    When compiling with fast we set:
+    C/C++ flags: -xtarget=native -xunroll=3 -xipo
+    LDFLAGS: -xipo
+
+    When not compiling with fast we always set -xtarget=generic
+
+    When compiling with fast on SPARC we also set:
+    C/C++ flags: -xbinopt=prepare
+    LDFLAGS: -xbinopt=prepare
+
+    When compiling with fast on x86 we also set:
+    C/C++ flags: -xregs=frameptr
+
+    The optimisation level is
+    -xO         Debug builds
+    -xO2        Production build on SPARC
+    -xO3        Production build on x86
+    -xO4        Fast builds on SPARC/x86
 
   MacOSX/x86/gcc
   --------------
@@ -433,6 +490,10 @@ extended_usage()
   Non-debug versions also add -Os -felide-constructors, where "-Os"
   means the build is space-optimised as long as the space optimisations
   do not negatively affect performance. Debug versions use -O.
+  
+  Mac OS X builds will always be 32-bit by default, when --64 is added
+  the build will be 64 bit instead. Thus the flag --m64 is added only
+  when specifically given as an option.
 EOF
 }
 with_usage()
@@ -537,11 +598,15 @@ parse_cpu_type()
   case "$cpu_type" in
     x86 )
       cpu_type="x86"
-      m32="yes"
+      if test "x$m64" != "x" ; then
+        m64="no"
+      fi
       ;;
     x86_64 )
       cpu_type="x86"
-      m64="yes"
+      if test "x$m64" != "x" ; then
+        m64="yes"
+      fi
       ;;
     itanium )
       cpu_type="itanium"
@@ -572,6 +637,9 @@ parse_compiler()
     forte )
       compiler="forte"
       ;;
+    SunStudio | sunstudio )
+      compiler="forte"
+      ;;
     *)
       echo "Unknown compiler '$compiler'"
       exit 1
@@ -601,6 +669,9 @@ parse_options()
         fast_flag="generic"
       fi
       ;;
+    --use-tcmalloc)
+      use_tcmalloc="yes"
+      ;;
     --with-debug)
       with_debug_flag="yes"
       fast_flag="no"
@@ -636,17 +707,19 @@ parse_options()
       warning_mode="normal"
       ;;
     --32)
-      if test "x$m64" != "x" ; then
+      if test "x$explicit_size_set" != "x" ; then
         echo "Cannot set both --32 and --64"
         exit 1
       fi
-      m32="yes"
+      explicit_size_set="yes"
+      m64="no"
       ;;
     --64)
-      if test "x$m32" != "x" ; then
+      if test "x$explicit_size_set" != "x" ; then
         echo "Cannot set both --32 and --64"
         exit 1
       fi
+      explicit_size_set="yes"
       m64="yes"
       ;;
     --package=*)
@@ -750,7 +823,7 @@ set_cpu_base()
   if test "x$cpu_type" = "x" ; then
     if test "x$cpu_arg" = "x" ; then
       usage "CPU type not discovered, cannot proceed"
-      return 1
+      exit 1
     fi
     case "$cpu_arg" in
       core2 | nocona | prescott | pentium* | i*86 )
@@ -775,18 +848,20 @@ set_cpu_base()
     check_cpu_cflags=""
   fi
   if test "x$os" = "xMacOSX" ; then
-    m64="no"
+    if test "x$m64" = "x" ; then
+      m64="no"
+    fi
   elif test "x$os" = "xSolaris" ; then
-    m64="yes"
-  elif test "x$m32" = "x" ; then
+    if test "x$m64" = "x" ; then
+      m64="yes"
+    fi
+  elif test "x$m64" = "x" ; then
     if test "x$cpu_arg" = "xnocona" || test "x$cpu_arg" = "xcore2" || \
        test "x$cpu_arg" = "xathlon64" || test "x$cpu_arg" = "xopteron" ; then
       m64="yes"
-    elif test "x$m64" != "xyes" ; then
+    else
       m64="no"
     fi
-  else
-    m64="no"
   fi
   echo "Discovered CPU of type $cpu_base_type ($cpu_arg) on $os"
   if test "x$m64" = "xyes" ; then
@@ -806,18 +881,15 @@ init_configure_commands()
   cxxflags="$cxx_warnings $base_cxxflags $compiler_flags"
   configure="./configure $base_configs $with_flags"
 
-  commands="$commands
-    CC=\"$CC\" CFLAGS=\"$cflags\" CXX=\"$CXX\" CXXFLAGS=\"$cxxflags\""
+  flags="CC=\"$CC\" CFLAGS=\"$cflags\" CXX=\"$CXX\" CXXFLAGS=\"$cxxflags\""
   if test "x$LDFLAGS" != "x" ; then
-    commands="$commands
-      LDFLAGS=\"$LDFLAGS\""
+    flags="$flags LDFLAGS=\"$LDFLAGS\""
   fi
   if test "x$ASFLAGS" != "x" ; then
-    commands="$commands
-      ASFLAGS=\"$ASFLAGS\""
+    flags="$flags ASFLAGS=\"$ASFLAGS\""
   fi
   commands="$commands
-    $configure"
+    $flags $configure"
 } 
 
 #
@@ -920,7 +992,7 @@ set_libtoolize_version()
 # We do not use ccache when gcov is used. Also only when
 # gcc is used.
 #
-set_up_ccache()
+set_ccache_usage()
 {
   if test "x$compiler" = "xgcc" ; then
     if ccache -V > /dev/null 2>&1 && test "$USING_GCOV" != "1"
@@ -993,7 +1065,7 @@ set_with_debug_flags()
   if test "x$with_debug_flag" = "xyes" ; then
     if test "x$developer_flag" = "xyes" ; then
       loc_debug_flags="-DUNIV_MUST_NOT_INLINE -DEXTRA_DEBUG -DFORCE_INIT_OF_VARS "
-      loc_debug_flags="$loc_debug_cflags -DSAFEMALLOC -DPEDANTIC_SAFEMALLOC"
+      loc_debug_flags="$loc_debug_flags -DSAFEMALLOC -DPEDANTIC_SAFEMALLOC"
       compiler_flags="$compiler_flags $loc_debug_flags"
     fi
   fi
@@ -1046,7 +1118,7 @@ set_base_configs()
   base_configs="$base_configs --enable-local-infile"
   base_configs="$base_configs --enable-thread-safe-client"
   base_configs="$base_configs --with-big-tables"
-  base_configs="$base_configs --with-extra-charsets=all"
+  base_configs="$base_configs --with-extra-charsets=complex"
   base_configs="$base_configs --with-ssl"
   base_configs="$base_configs --with-pic"
   base_configs="$base_configs --with-csv-storage-engine"
@@ -1059,17 +1131,27 @@ set_base_configs()
 #
 set_base_engines()
 {
-  engine_configs="$engine_configs --with-archive-storage-engine"
+  engine_configs="--with-archive-storage-engine"
   engine_configs="$engine_configs --with-blackhole-storage-engine"
-  engine_configs="$engine_configs --with-example-storage-engine"
+  engine_configs="$engine_configs --without-example-storage-engine"
   engine_configs="$engine_configs --with-federated-storage-engine"
   engine_configs="$engine_configs --with-partition"
+  base_configs="$base_configs $engine_configs"
 }
 
-set_pro_package()
+set_innodb_engine()
 {
-  base_configs="$base_configs $engine_configs"
   base_configs="$base_configs --with-innodb"
+}
+
+set_ndb_engine()
+{
+  base_configs="$base_configs --with-ndbcluster"
+  base_configs="$base_configs --without-ndb-debug"
+}
+
+set_pro_package()
+{
   base_configs="$base_configs --with-comment=\"MySQL Pro $version_text built from source\""
   if test "x$with_debug_flag" = "xyes" ; then
     base_configs="$base_configs --with-server-suffix=\"-debug\""
@@ -1081,10 +1163,6 @@ set_cge_extended_package()
   if test "x$gpl" = "xno" ; then
     echo "Cannot build Extended Carrier Grade Edition as Commercial version"
   fi
-  base_configs="$base_configs --with-ndbcluster"
-  base_configs="$base_configs --without-ndb-debug"
-  base_configs="$base_configs $engine_configs"
-  base_configs="$base_configs --with-innodb"
   base_configs="$base_configs --with-comment=\"MySQL Cluster Carrier Grade Extended Edition $version_text built from source\""
   if test "x$with_debug_flag" = "xyes" ; then
     base_configs="$base_configs --with-server-suffix=\"-cge-extended-debug\""
@@ -1095,9 +1173,6 @@ set_cge_extended_package()
 
 set_cge_package()
 {
-  base_configs="$base_configs --with-ndbcluster"
-  base_configs="$base_configs --without-ndb-debug"
-  base_configs="$base_configs $engine_configs"
   base_configs="$base_configs --with-comment=\"MySQL Cluster Carrier Grade Edition $version_text built from source\""
   if test "x$with_debug_flag" = "xyes" ; then
     base_configs="$base_configs --with-server-suffix=\"-cge-debug\""
@@ -1139,6 +1214,36 @@ set_gcc_special_options()
   fi
 }
 
+set_cc_and_cxx_for_gcc()
+{
+  if test "x$CC" = "x" ; then
+    CC="gcc -static-libgcc"
+  fi
+  if test "x$CXX" = "x" ; then
+    CXX="gcc -static-libgcc"
+  fi
+}
+
+set_cc_and_cxx_for_icc()
+{
+  if test "x$CC" = "x" ; then
+    CC="icc -static-intel -static-libgcc"
+  fi
+  if test "x$CXX" = "x" ; then
+    CXX="icpc -static-intel -static-libgcc"
+  fi
+}
+
+set_cc_and_cxx_for_forte()
+{
+  if test "x$CC" = "x" ; then
+    CC="cc"
+  fi
+  if test "x$CXX" = "x" ; then
+    CXX="CC"
+  fi
+}
+
 #
 # If we discover a Core 2 Duo architecture and we have enabled the fast
 # flag, we enable a compile especially optimised for Core 2 Duo. This
@@ -1166,8 +1271,12 @@ set_bsd_configs()
     exit 1
   fi
   base_configs="$base_configs --enable-assembler"
-  CC="gcc"
-  CXX="gcc"
+  if test "x$fast_flag" != "xno" ; then
+    compiler_flags="$compiler_flags -O3"
+  else
+    compiler_flags="$compiler_flags -O"
+  fi
+  set_cc_and_cxx_for_gcc
 }
 
 #
@@ -1177,24 +1286,31 @@ set_linux_configs()
 {
   if test "x$cpu_base_type" != "xx86" && \
      test "x$cpu_base_type" != "xitanium" ; then
-    usage "Only x86 and Itanium CPUs supported for 32-bit Linux"
+    usage "Only x86 and Itanium CPUs supported for Linux"
     exit 1
   fi
-  base_configs="$base_configs --with-fast-mutexes"
+  if test "x$use_tcmalloc" = "xyes" ; then
+    base_configs="$base_configs --with-mysqld-libs=-ltcmalloc_minimal"
+  fi
   if test "x$cpu_base_type" = "xx86" ; then
     base_configs="$base_configs --enable-assembler"
   fi
   if test "x$compiler" = "xgcc" ; then
-    CC="gcc"
-    CXX="gcc"
+    set_cc_and_cxx_for_gcc
     if test "x$m64" = "xyes" ; then
       compiler_flags="$compiler_flags -m64"
+    else
+      compiler_flags="$compiler_flags -m32"
+    fi
+    if test "x$fast_flag" != "xno" ; then
+      compiler_flags="$compiler_flags -O2"
+    else
+      compiler_flags="$compiler_flags -O"
     fi
 # configure will set proper compiler flags for gcc on Linux
   elif test "x$compiler" = "xicc" ; then
     compiler_flags="$compiler_flags -mp -restrict"
-    CC="icc -static-intel"
-    CXX="icpc -static-intel"
+    set_cc_and_cxx_for_icc
     if test "x$cpu_base_type" = "xitanium" ; then
       compiler_flags="$compiler_flags -no-ftz"
     fi
@@ -1215,53 +1331,99 @@ set_linux_configs()
 #
 set_solaris_configs()
 {
+# Use mtmalloc as malloc, see Tim Cook blog
   base_configs="$base_configs --with-mysqld-libs=-lmtmalloc"
+  base_configs="$base_configs --with-named-curses=-lcurses"
   case "`uname -a`" in
-    *5.10*|*5.11*)
+    *5.8* | *5.9* | *5.10* | *5.11*)
+
       ;;
     *)
-      die "Only versions 10 and 11 supported for Solaris"
+      usage "Only versions 8,9, 10 and 11 supported for Solaris"
+      exit 1
   esac
   if test "x$cpu_base_type" != "xx86" && \
      test "x$cpu_base_type" != "xsparc" ; then
     usage "Only x86 and Sparc CPUs supported for Solaris"
     exit 1
   fi
+  if test "x$compiler" != "xgcc" && \
+     test "x$compiler" != "xforte" ; then
+    usage "Only gcc and Forte compilers supported for Solaris"
+    exit 1
+  fi
+  if test "x$m64" = "xyes" ; then
+    compiler_flags="$compiler_flags -m64"
+    LDFLAGS="-m64"
+    ASFLAGS="$ASFLAGS -m64"
+  else
+    compiler_flags="$compiler_flags -m32"
+    LDFLAGS="-m32"
+    ASFLAGS="$ASFLAGS -m32"
+  fi
   if test "x$compiler" = "xgcc" ; then
-    CC="gcc"
-    CXX="gcc"
+    set_cc_and_cxx_for_gcc
     if test "x$cpu_base_type" != "xx86" ; then
-      usage "Only gcc supported for Solaris 10/11 on SPARC"
+      usage "gcc currently not supported for Solaris on SPARC"
+      exit 1
     fi
-    compiler_flags="$compiler_flags -m64 -DMY_ATOMIC_MODE_RWLOCKS"
-    LDFLAGS="-m64 -static-libgcc"
-    if test "x$fast_flag" != "xno" ; then
-      LDFLAGS="$LDFLAGS -O2"
-      compiler_flags="$compiler_flags -O2"
+    if test "x$fast_flag" = "xyes" ; then
+      LDFLAGS="$LDFLAGS -O3"
+      compiler_flags="$compiler_flags -O3"
     else
-      LDFLAGS="$LDFLAGS -O"
-      compiler_flags="$compiler_flags -O"
-    fi
-  elif test "x$compiler" = "xforte" ; then
-    if test "x$cpu_base_type" = "xx86" ; then
-      usage "Only gcc supported for Solaris/x86"
-    fi
-    if test "x$cpu_base_type" != "xsparc" ; then
-      usage "Forte compiler supported for Solaris 9/SPARC only"
+      if test "x$fast_flag" = "xgeneric" ; then
+        LDFLAGS="$LDFLAGS -O2"
+        compiler_flags="$compiler_flags -O2"
+      else
+        LDFLAGS="$LDFLAGS -O"
+        compiler_flags="$compiler_flags -O"
+      fi
     fi
-    CC="cc-5.0"
-    CXX=CC
-    ASFLAGS="xarch=v9"
-    LDFLAGS="xarch=v9"
-    base_cflags="$base_cflags -Xa -xstrconst -xc99=none"
+  else
+#Using Forte compiler (SunStudio)
+    set_cc_and_cxx_for_forte
+    base_cflags="$base_cflags -xstrconst"
+    compiler_flags="$compiler_flags -mt"
+    LD_FLAGS="$LD_FLAGS -mt"
+    compiler_flags="$compiler_flags -fsimple=1"
+    compiler_flags="$compiler_flags -ftrap=%none"
+    compiler_flags="$compiler_flags -xbuiltin=%all"
+    compiler_flags="$compiler_flags -xlibmil"
+    compiler_flags="$compiler_flags -xlibmopt"
     base_cxxflags="$base_cxxflags -noex"
-    compiler_flags="$compiler_flags -mt -D_FORTEC -xarch=v9"
-    if test "x$fast_flag" != "xno" ; then
-      compiler_flags="$compiler_flags -xO3"
+    if test "x$fast_flag" = "xyes" ; then
+      compiler_flags="$compiler_flags -xtarget=native"
+      compiler_flags="$compiler_flags -xipo"
+      compiler_flags="$compiler_flags -xunroll=3"
+      LD_FLAGS="$LD_FLAGS -xipo"
+    else
+      compiler_flags="$compiler_flags -xtarget=generic"
+    fi
+    if test "x$cpu_base_type" = "xx86" ; then
+      compiler_flags="$compiler_flags -nofstore"
+      if test "x$fast_flag" = "xyes" ; then
+        compiler_flags="$compiler_flags -xregs=frameptr"
+        compiler_flags="$compiler_flags -xO4"
+      elif test "x$fast_flag" = "xgeneric" ; then
+        compiler_flags="$compiler_flags -xO2"
+      else
+        compiler_flags="$compiler_flags -xO"
+      fi
+    else
+#Using SPARC cpu with SunStudio (Forte) compiler
+      ASFLAGS="$ASFLAGS -xarch=sparc"
+      LDFLAGS="$LDFLAGS -xarch=sparc"
+      compiler_flags="$compiler_flags -xarch=sparc"
+      if test "x$fast_flag" = "xyes" ; then
+        compiler_flags="$compiler_flags -xbinopt=prepare"
+        LDFLAGS="$LDFLAGS -xbinopt=prepare"
+        compiler_flags="$compiler_flags -xO4"
+      elif test "x$fast_flag" = "xgeneric" ; then
+        compiler_flags="$compiler_flags -xO2"
+      else
+        compiler_flags="$compiler_flags -xO"
+      fi
     fi
-  else
-    usage "Only gcc and Forte compilers supported for Solaris"
-    exit 1
   fi
 }
 
@@ -1270,10 +1432,7 @@ set_solaris_configs()
 #
 set_macosx_configs()
 {
-  base_cxxflags="$base_cxxflags -fno-common"
-  if test "x$cpu_base_type" = "xx86" && test "x$compiler" = "xgcc" ; then
-    compiler_flags="$compiler_flags -arch i386"
-  else
+  if test "x$cpu_base_type" != "xx86" || test "x$compiler" != "xgcc" ; then
     usage "Only gcc/x86 supported for Mac OS X"
     exit 1
   fi
@@ -1281,14 +1440,21 @@ set_macosx_configs()
 # Optimize for space as long as it doesn't affect performance, use some
 # optimisations also when not in fast mode.
 #
+  base_cxxflags="$base_cxxflags -felide-constructors"
+  base_cxxflags="$base_cxxflags -fno-common"
+  if test "x$m64" = "xyes" ; then
+    compiler_flags="$compiler_flags -m64"
+    compiler_flags="$compiler_flags -arch x86_64"
+  else
+    compiler_flags="$compiler_flags -m32"
+    compiler_flags="$compiler_flags -arch i386"
+  fi
   if test "x$fast_flag" != "xno" ; then
     compiler_flags="$compiler_flags -Os"
-    base_cxxflags="$base_cxxflags -felide-constructors"
   else
     compiler_flags="$compiler_flags -O"
   fi
-  CC="gcc"
-  CXX="gcc"
+  set_cc_and_cxx_for_gcc
 }
 
 #
@@ -1397,11 +1563,15 @@ base_cxxflags=
 base_configs=
 debug_flags=
 cxxflags=
-m32=
 m64=
+explicit_size_set=
 datadir=
 commands=
 use_autotools=
+engine_configs=
+ASFLAGS=
+LDFLAGS=
+use_tcmalloc=
 
 set_defaults_based_on_environment
 
@@ -1418,7 +1588,14 @@ set -e
 # This call sets the cpu_arg and check_cpu_args parameters
 #
 path=`dirname $0`
+if test "x$compiler" = "xgcc" ; then
+  compiler=
+fi
 . "$path/check-cpu"
+if test "x$compiler" = "x" ; then
+  compiler="gcc"
+fi
+check_os
 set_cpu_base
 if test "x$?" = "x1" ; then
   exit 1
@@ -1446,17 +1623,23 @@ set_icc_special_options
 # including all storage engines except InnoDB, and to use GPL libraries.
 #
 set_base_configs
-set_base_engines
 if test "x$gpl" = "xyes" ; then
   version_text="GPL version"
 else
   version_text="Commercial version"
 fi
 if test "x$package" = "xpro" ; then
+  set_base_engines
+  set_innodb_engine
   set_pro_package
 elif test "x$package" = "xextended" ; then
+  set_base_engines
+  set_ndb_engine
+  set_innodb_engine
   set_cge_extended_package
 elif test "x$package" = "xcge" ; then
+  set_base_engines
+  set_ndb_engine
   set_cge_package
 elif test "x$package" = "xclassic" ; then
   set_classic_package
@@ -1472,7 +1655,6 @@ set_error_inject_configs
 # operating systems, and processors.
 #
 
-check_os
 if test "x$os" = "xlinux" ; then
   set_linux_configs
 elif test "x$os" = "xSolaris" ; then
@@ -1490,7 +1672,7 @@ fi
 # proper libtoolize versions, and to determine whether to use ccache.
 #
 set_make_version
-set_up_ccache
+set_ccache_usage
 
 #
 # Set up commands variable from variables prepared for base 

=== modified file 'BUILD/check-cpu'
--- a/BUILD/check-cpu	2008-08-18 17:33:00 +0000
+++ b/BUILD/check-cpu	2009-02-03 12:09:35 +0000
@@ -16,6 +16,9 @@ check_cpu () {
     # on Linux (and others?) we can get detailed CPU information out of /proc
     cpuinfo="cat $CPUINFO"
 
+    # detect CPU architecture
+    cpu_arch=`$cpuinfo | grep 'arch' | cut -d ':' -f 2 | cut -d ' ' -f 2 | head -1`
+
     # detect CPU family
     cpu_family=`$cpuinfo | grep 'family' | cut -d ':' -f 2 | cut -d ' ' -f 2 | head -1`
     if test -z "$cpu_family" ; then
@@ -51,8 +54,8 @@ check_cpu () {
         model_name=`machine`
         ;;
       *)
-        cpu_family=`uname -m`;
-        model_name=`uname -p`;
+        cpu_family=`uname -p`;
+        model_name=`uname -m`;
         ;;
     esac
   fi
@@ -60,7 +63,7 @@ check_cpu () {
   # detect CPU shortname as used by gcc options 
   # this list is not complete, feel free to add further entries
   cpu_arg=""
-  case "$cpu_family--$model_name" in
+  case "$cpu_family--$model_name--$spu_arch" in
     # DEC Alpha
     Alpha*EV6*)
       cpu_arg="ev6";
@@ -137,8 +140,11 @@ check_cpu () {
     *Itanium*)
       cpu_arg="itanium"
       ;;
+    *IA-64*)
+      cpu_arg="itanium"
+      ;;
     # Solaris Sparc
-    *sparc*sun4u*)
+    *sparc*sun4[uv]*)
       cpu_arg="sparc"
       ;;
     # Power PC
@@ -175,67 +181,69 @@ check_cpu () {
     cc=$CC
   fi
 
-  cc_ver=`$cc --version | sed 1q`
-  cc_verno=`echo $cc_ver | sed -e 's/^.*(GCC)//g; s/[^0-9. ]//g;	 s/^ *//g; s/ .*//g'`
-  set -- `echo $cc_verno | tr '.' ' '`
-  cc_major=$1
-  cc_minor=$2
-  cc_patch=$3
-  cc_comp=`expr $cc_major '*' 100 '+' $cc_minor`
+  if test "x$compiler" = "x" ; then
+    cc_ver=`$cc --version | sed 1q`
+    cc_verno=`echo $cc_ver | sed -e 's/^.*(GCC)//g; s/[^0-9. ]//g;	 s/^ *//g; s/ .*//g'`
+    set -- `echo $cc_verno | tr '.' ' '`
+    cc_major=$1
+    cc_minor=$2
+    cc_patch=$3
+    cc_comp=`expr $cc_major '*' 100 '+' $cc_minor`
   
-  case "$cc_ver--$cc_verno" in
-    *GCC*)
-        # different gcc backends (and versions) have different CPU flags
-        case `gcc -dumpmachine` in
-          i?86-* | x86_64-*)
-	    if test "$cc_comp" -lt 304 ; then
-              check_cpu_cflags="-mcpu=${cpu_arg}"
-            elif test "$cc_comp" -ge 402 ; then
-              check_cpu_cflags="-mtune=native"
-            else
-              check_cpu_cflags="-mtune=${cpu_arg}"
-            fi
-            ;;
-          ppc-*)
-              check_cpu_cflags="-mcpu=${cpu_arg} -mtune=${cpu_arg}"
-            ;;
-          *)
-            check_cpu_cflags=""
-            return
-            ;;
-        esac
-      ;;
-    2.95.*)
-      # GCC 2.95 doesn't expose its name in --version output
-      check_cpu_cflags="-m${cpu_arg}"
-      ;;
-    *)
-      check_cpu_cflags=""
-      return
-      ;;
-  esac
+    case "$cc_ver--$cc_verno" in
+      *GCC*)
+          # different gcc backends (and versions) have different CPU flags
+          case `gcc -dumpmachine` in
+            i?86-* | x86_64-*)
+	      if test "$cc_comp" -lt 304 ; then
+                check_cpu_cflags="-mcpu=${cpu_arg}"
+              elif test "$cc_comp" -ge 402 ; then
+                check_cpu_cflags="-mtune=native"
+              else
+                check_cpu_cflags="-mtune=${cpu_arg}"
+              fi
+              ;;
+            ppc-*)
+                check_cpu_cflags="-mcpu=${cpu_arg} -mtune=${cpu_arg}"
+              ;;
+            *)
+              check_cpu_cflags=""
+              return
+              ;;
+          esac
+        ;;
+      2.95.*)
+        # GCC 2.95 doesn't expose its name in --version output
+        check_cpu_cflags="-m${cpu_arg}"
+        ;;
+      *)
+        check_cpu_cflags=""
+        return
+        ;;
+    esac
+    # now we check whether the compiler really understands the cpu type
+    touch __test.c
 
-  # now we check whether the compiler really understands the cpu type
-  touch __test.c
+    while [ "$cpu_arg" ] ; do
+      printf "testing $cpu_arg ... " >&2
+           
+      # compile check
+      eval "$cc -c $check_cpu_cflags __test.c" 2>/dev/null
+      if test "x$?" = "x0" ; then
+        echo ok >&2
+        break;
+      fi
 
+      echo failed >&2
+      check_cpu_cflags=""
+      break;
+    done
+    rm __test.*
+  fi
   if test "x$core2" = "xyes" ; then
     cpu_arg="core2"
   fi
-  while [ "$cpu_arg" ] ; do
-    printf "testing $cpu_arg ... " >&2
-          
-    # compile check
-    eval "$cc -c $check_cpu_cflags __test.c" 2>/dev/null
-    if test "x$?" = "x0" ; then
-      echo ok >&2
-      break;
-    fi
-
-    echo failed >&2
-    check_cpu_cflags=""
-    break;
-  done
-  rm __test.*
+  return 0
 }
  
 check_cpu

=== added file 'config/ac-macros/dtrace.m4'
--- a/config/ac-macros/dtrace.m4	1970-01-01 00:00:00 +0000
+++ b/config/ac-macros/dtrace.m4	2009-03-06 12:10:58 +0000
@@ -0,0 +1,38 @@
+dnl ---------------------------------------------------------------------------
+dnl Macro: DTRACE_TEST
+dnl ---------------------------------------------------------------------------
+AC_ARG_ENABLE(dtrace,
+        AC_HELP_STRING([--enable-dtrace],[Build with support for the DTRACE.]),
+        [
+                ENABLE_DTRACE="$enable_dtrace"
+        ],
+        [
+                ENABLE_DTRACE="yes" 
+        ]
+)
+DTRACEFLAGS=""
+HAVE_DTRACE=""
+HAVE_DTRACE_DASH_G=""
+if test "$ENABLE_DTRACE" = "yes"; then
+  AC_CHECK_PROGS(DTRACE, dtrace, [not found], [$PATH:/usr/sbin])
+  if test "$DTRACE" = "not found"; then
+    ENABLE_DTRACE="no"
+  else
+    AC_DEFINE([HAVE_DTRACE], [1], [Defined to 1 if DTrace support is enabled])
+    case "$target_os" in
+      *solaris*)
+        HAVE_DTRACE_DASH_G="yes"
+        ;;
+      *)
+        HAVE_DTRACE_DASH_G="no"
+        ;;
+    esac
+  fi
+fi
+AC_SUBST(DTRACEFLAGS)
+AC_SUBST(HAVE_DTRACE)
+AM_CONDITIONAL([HAVE_DTRACE], [ test "$ENABLE_DTRACE" = "yes" ])
+AM_CONDITIONAL([HAVE_DTRACE_DASH_G], [ test "$HAVE_DTRACE_DASH_G" = "yes" ])
+dnl ---------------------------------------------------------------------------
+dnl End Macro: DTRACE_TEST
+dnl ---------------------------------------------------------------------------

=== removed file 'config/ac-macros/dtrace.m4'
--- a/config/ac-macros/dtrace.m4	2007-09-12 04:16:59 +0000
+++ b/config/ac-macros/dtrace.m4	1970-01-01 00:00:00 +0000
@@ -1,20 +0,0 @@
-dnl ---------------------------------------------------------------------------
-dnl Macro: DTRACE_TEST
-dnl ---------------------------------------------------------------------------
-AC_ARG_ENABLE(dtrace,
-    [  --enable-dtrace      Build with support for the DTRACE.],
-    [ 
-      AC_DEFINE([HAVE_DTRACE], [1], [Enables DTRACE Support])
-      AC_CHECK_PROGS(DTRACE, dtrace)
-      ENABLE_DTRACE="yes" 
-      AC_SUBST(DTRACEFLAGS)
-      AC_SUBST(HAVE_DTRACE)
-    ],
-    [
-      ENABLE_DTRACE="no" 
-    ]
-    )
-AM_CONDITIONAL([HAVE_DTRACE], [ test "$ENABLE_DTRACE" = "yes" ])
-dnl ---------------------------------------------------------------------------
-dnl End Macro: DTRACE_TEST
-dnl ---------------------------------------------------------------------------

=== modified file 'configure.in'
--- a/configure.in	2009-06-17 07:30:19 +0000
+++ b/configure.in	2009-07-02 14:23:36 +0000
@@ -58,8 +58,8 @@ sinclude(config/ac-macros/alloca.m4)
 sinclude(config/ac-macros/check_cpu.m4)
 sinclude(config/ac-macros/character_sets.m4)
 sinclude(config/ac-macros/compiler_flag.m4)
-sinclude(config/ac-macros/dtrace.m4)
 sinclude(config/ac-macros/plugins.m4)
+sinclude(config/ac-macros/dtrace.m4)
 sinclude(config/ac-macros/ha_ndbcluster.m4)
 sinclude(config/ac-macros/large_file.m4)
 sinclude(config/ac-macros/misc.m4)
@@ -866,6 +866,8 @@ then
   AC_CHECK_DECLS(SHM_HUGETLB, 
       AC_DEFINE([HAVE_LARGE_PAGES], [1], 
                 [Define if you have large pages support])
+      AC_DEFINE([HAVE_LARGE_PAGE_OPTION], [1], 
+                [Define if you have large page option])
       AC_DEFINE([HUGETLB_USE_PROC_MEMINFO], [1],
                 [Define if /proc/meminfo shows the huge page size (Linux only)])
       , ,
@@ -873,6 +875,20 @@ then
 #include <sys/shm.h>
       ]
   )
+else
+# For large pages support on Solaris
+AC_CHECK_DECLS(MHA_MAPSIZE_VA,
+      AC_DEFINE([HAVE_SOLARIS_LARGE_PAGES], [1],
+                [Define to 1 if you have large pages support])
+      AC_DEFINE([HAVE_LARGE_PAGE_OPTION], [1], 
+                [Define if you have large page option])
+      , ,
+      [
+#include <sys/mman.h>
+      ]
+)
+
+
 fi
 
 #--------------------------------------------------------------------
@@ -1803,6 +1819,64 @@ case "$with_atomic_ops" in
    *) AC_MSG_ERROR(["$with_atomic_ops" is not a valid value for --with-atomic-ops]) ;;
 esac
 
+AC_CACHE_CHECK([whether the compiler provides atomic builtins],
+               [mysql_cv_gcc_atomic_builtins],
+  [AC_RUN_IFELSE(
+     [AC_LANG_PROGRAM(
+        [
+        #include <atomic.h>
+        ]
+     [[
+        int foo= -10; int bar= 10;
+        if (!__sync_fetch_and_add(&foo, bar) || foo)
+          return -1;
+        bar= __sync_lock_test_and_set(&foo, bar);
+        if (bar || foo != 10)
+          return -1;
+        bar= __sync_val_compare_and_swap(&bar, foo, 15);
+        if (bar)
+          return -1;
+        return 0;
+     ]]
+     )],
+   [mysql_cv_gcc_atomic_builtins=yes],
+   [mysql_cv_gcc_atomic_builtins=no],
+   [mysql_cv_gcc_atomic_builtins=no]
+)])
+if test "x$mysql_cv_gcc_atomic_builtins" = xyes; then
+  AC_DEFINE(HAVE_GCC_ATOMIC_BUILTINS, 1,
+            [Define to 1 if compiler provides atomic builtins.])
+fi
+
+AC_CACHE_CHECK([whether the OS provides atomic_* functions like Solaris],
+               [mysql_cv_solaris_atomic], 
+  [AC_RUN_IFELSE(
+     [AC_LANG_PROGRAM(
+        [
+        #include <atomic.h>
+        ]
+     [[
+	int foo = -10; int bar = 10;
+	if (atomic_add_int_nv((uint_t *)&foo, bar) || foo)
+		return -1;
+	bar = atomic_swap_uint((uint_t *)&foo, (uint_t)bar);
+	if (bar || foo != 10)
+		return -1;
+	bar = atomic_cas_uint((uint_t *)&bar, (uint_t)foo, 15);
+	if (bar)
+		return -1;
+	return 0;
+     ]]
+     )],
+   [mysql_cv_solaris_atomic=yes],
+   [mysql_cv_solaris_atomic=no],
+   [mysql_cv_solaris_atomic=no]
+)])
+if test "x$mysql_cv_solaris_atomic" = xyes; then
+  AC_DEFINE(HAVE_SOLARIS_ATOMIC, 1,
+            [Define to 1 if OS provides atomic_* functions like Solaris.])
+fi
+
 # Force static compilation to avoid linking problems/get more speed
 AC_ARG_WITH(mysqld-ldflags,
     [  --with-mysqld-ldflags   Extra linking arguments for mysqld],
@@ -2452,6 +2526,44 @@ fi
 fi
 #---END:
 
+#Check for x86 PAUSE instruction
+AC_MSG_CHECKING("for x86 PAUSE instruction")
+# We have to actually try running the test program, because of a bug
+# in Solaris on x86_64, where it wrongly reports that PAUSE is not
+# supported when trying to run an application.  See
+# http://bugs.opensolaris.org/bugdatabase/printableBug.do?bug_id=6478684
+AC_RUN_IFELSE(
+  [AC_LANG_PROGRAM([],
+     [[
+      __asm__ __volatile__ ("pause");
+      return 0;
+     ]]
+  )],
+    [x86_pause_exists=yes],
+    [x86_pause_exists=no],
+    [x86_pause_exists=no]  # Cross-compile, assume no PAUSE instruction
+)
+AC_RUN_IFELSE(
+  [AC_LANG_PROGRAM([],
+     [[
+      __asm__ __volatile__ ("rep; nop");
+      return 0;
+     ]]
+  )],
+    [x86_fake_pause_exists=yes],
+    [x86_fake_pause_exists=no],
+    [x86_fake_pause_exists=no]  # Cross-compile, assume no x86 NOP instruction
+)
+if test "$x86_pause_exists" = "yes"
+then
+  AC_DEFINE([HAVE_PAUSE_INSTRUCTION], [1], [Does x86 PAUSE instruction exist])
+else
+  if test "$x86_fake_pause_exists" = "yes"
+  then
+    AC_DEFINE([HAVE_FAKE_PAUSE_INSTRUCTION], [1], [Does x86 NOP instruction exist])
+  fi
+fi
+
 # Check if pthread_attr_setscope() exists
 AC_CACHE_CHECK("for pthread_attr_setscope", mysql_cv_pthread_attr_setscope,
   AC_LINK_IFELSE(

=== modified file 'include/Makefile.am'
--- a/include/Makefile.am	2009-06-30 08:03:05 +0000
+++ b/include/Makefile.am	2009-07-03 12:25:24 +0000
@@ -15,7 +15,7 @@
 # Software Foundation, Inc., 59 Temple Place - Suite 330, Boston,
 # MA 02111-1307, USA
 
-BUILT_SOURCES =		$(HEADERS_GEN_MAKE) link_sources
+BUILT_SOURCES =		$(HEADERS_GEN_MAKE) link_sources probes_mysql_nodtrace.h
 HEADERS_GEN_CONFIGURE =		mysql_version.h
 HEADERS_GEN_MAKE =		my_config.h
 HEADERS_ABI =		mysql.h mysql_com.h mysql_time.h \
@@ -29,7 +29,7 @@ pkginclude_HEADERS =	$(HEADERS_ABI) my_d
 			my_getopt.h sslopt-longopts.h my_dir.h \
 			sslopt-vars.h sslopt-case.h sql_common.h keycache.h \
 			m_ctype.h my_attribute.h $(HEADERS_GEN_CONFIGURE) \
-			$(HEADERS_GEN_MAKE)
+			$(HEADERS_GEN_MAKE) probes_mysql.h probes_mysql_nodtrace.h
 noinst_HEADERS =	config-win.h config-netware.h lf.h my_bit.h \
 			heap.h myisamchk.h my_bitmap.h my_uctype.h \
 			myisam.h myisampack.h myisammrg.h ft_global.h\
@@ -41,11 +41,12 @@ noinst_HEADERS =	config-win.h config-net
 			my_vle.h my_user.h my_atomic.h atomic/nolock.h \
 			atomic/rwlock.h atomic/x86-gcc.h atomic/generic-msvc.h \
                         atomic/gcc_builtins.h my_libwrap.h my_stacktrace.h \
+                        atomic/solaris.h \
                         wqueue.h waiting_threads.h
-EXTRA_DIST =        mysql.h.pp mysql/plugin.h.pp
+EXTRA_DIST =        mysql.h.pp mysql/plugin.h.pp probes_mysql.d.base 
 
 # Remove built files and the symlinked directories
-CLEANFILES =            $(BUILT_SOURCES) readline openssl
+CLEANFILES =            $(BUILT_SOURCES) readline openssl probes_mysql.d probes_mysql_nodtrace.h
 
 
 # Some include files that may be moved and patched by configure
@@ -67,3 +68,24 @@ my_config.h: config.h
 # generated by configure from the .h.in files
 dist-hook:
 	$(RM) -f $(distdir)/mysql_version.h $(distdir)/my_config.h
+
+probes_mysql.d:
+	if ! test -f probes_mysql.d ; then \
+		$(CP) -f $(top_srcdir)/include/probes_mysql.d.base probes_mysql.d; \
+	fi
+
+DTRACEPROVIDER = probes_mysql.d
+if HAVE_DTRACE
+BUILT_SOURCES += probes_mysql_dtrace.h
+CLEANFILES += $(DTRACEPROVIDER)
+
+# Fake for creating the probes file. If we are building a separate directory
+# then we copy the probes from the source location and use that
+# If we are building in the same directory as the source, we do not copy
+
+probes_mysql_dtrace.h: $(DTRACEPROVIDER)
+	$(DTRACE) $(DTRACEFLAGS) -h -s $(DTRACEPROVIDER) -o $@
+endif
+
+probes_mysql_nodtrace.h: $(DTRACEPROVIDER)
+	@PERL@ $(top_srcdir)/scripts/dheadgen.pl -f $(DTRACEPROVIDER) > $@

=== modified file 'include/atomic/nolock.h'
--- a/include/atomic/nolock.h	2008-07-09 07:12:43 +0000
+++ b/include/atomic/nolock.h	2009-07-02 14:23:36 +0000
@@ -29,9 +29,15 @@
 #  elif defined(_MSC_VER)
 #    include "generic-msvc.h"
 #  endif
+#elif defined(HAVE_SOLARIS_ATOMIC)
+#include "solaris.h"
 #endif
 
-#ifdef make_atomic_cas_body
+#if defined(make_atomic_cas_body) || defined(MY_ATOMICS_MADE)
+/*
+ * We have atomics that require no locking
+ */
+#define	MY_ATOMIC_NOLOCK
 /*
   Type not used so minimal size (emptry struct has different size between C
   and C++, zero-length array is gcc-specific).

=== added file 'include/atomic/solaris.h'
--- a/include/atomic/solaris.h	1970-01-01 00:00:00 +0000
+++ b/include/atomic/solaris.h	2008-10-16 19:12:16 +0000
@@ -0,0 +1,210 @@
+/* Copyright (C) 2008 MySQL AB
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+#include <atomic.h>
+
+#define	MY_ATOMIC_MODE	"solaris-atomic"
+
+/*
+ * This is defined to indicate we fully define the my_atomic_* (inline)
+ * functions here, so there is no need to "make" them in my_atomic.h
+ * using make_atomic_* and make_atomic_*_body.
+ */
+#define	MY_ATOMICS_MADE
+
+STATIC_INLINE int
+my_atomic_cas8(int8 volatile *a, int8 *cmp, int8 set)
+{
+	int ret;
+	int8 sav;
+	sav = (int8) atomic_cas_8((volatile uint8_t *)a, (uint8_t)*cmp,
+		(uint8_t)set);
+	if (! (ret = (sav == *cmp)))
+		*cmp = sav;
+	return ret;
+}
+
+STATIC_INLINE int
+my_atomic_cas16(int16 volatile *a, int16 *cmp, int16 set)
+{
+	int ret;
+	int16 sav;
+	sav = (int16) atomic_cas_16((volatile uint16_t *)a, (uint16_t)*cmp,
+		(uint16_t)set);
+	if (! (ret = (sav == *cmp)))
+		*cmp = sav;
+	return ret;
+}
+
+STATIC_INLINE int
+my_atomic_cas32(int32 volatile *a, int32 *cmp, int32 set)
+{
+	int ret;
+	int32 sav;
+	sav = (int32) atomic_cas_32((volatile uint32_t *)a, (uint32_t)*cmp,
+		(uint32_t)set);
+	if (! (ret = (sav == *cmp)))
+		*cmp = sav;
+	return ret;
+}
+
+STATIC_INLINE int
+my_atomic_casptr(void * volatile *a, void **cmp, void *set)
+{
+	int ret;
+	void *sav;
+	sav = atomic_cas_ptr(a, *cmp, set);
+	if (! (ret = (sav == *cmp)))
+		*cmp = sav;
+	return ret;
+}
+
+/* ------------------------------------------------------------------------ */
+
+STATIC_INLINE int8
+my_atomic_add8(int8 volatile *a, int8 v)
+{
+	int8 nv;
+	nv = atomic_add_8_nv((volatile uint8_t *)a, v);
+	return (nv - v);
+}
+
+STATIC_INLINE int16
+my_atomic_add16(int16 volatile *a, int16 v)
+{
+	int16 nv;
+	nv = atomic_add_16_nv((volatile uint16_t *)a, v);
+	return (nv - v);
+}
+
+STATIC_INLINE int32
+my_atomic_add32(int32 volatile *a, int32 v)
+{
+	int32 nv;
+	nv = atomic_add_32_nv((volatile uint32_t *)a, v);
+	return (nv - v);
+}
+
+/* ------------------------------------------------------------------------ */
+
+#ifdef MY_ATOMIC_MODE_DUMMY
+
+STATIC_INLINE int8
+my_atomic_load8(int8 volatile *a)	{ return (*a); }
+
+STATIC_INLINE int16
+my_atomic_load16(int16 volatile *a)	{ return (*a); }
+
+STATIC_INLINE int32
+my_atomic_load32(int32 volatile *a)	{ return (*a); }
+
+STATIC_INLINE void *
+my_atomic_loadptr(void * volatile *a)	{ return (*a); }
+
+/* ------------------------------------------------------------------------ */
+
+STATIC_INLINE void
+my_atomic_store8(int8 volatile *a, int8 v)	{ *a = v; }
+
+STATIC_INLINE void
+my_atomic_store16(int16 volatile *a, int16 v)	{ *a = v; }
+
+STATIC_INLINE void
+my_atomic_store32(int32 volatile *a, int32 v)	{ *a = v; }
+
+STATIC_INLINE void
+my_atomic_storeptr(void * volatile *a, void *v)	{ *a = v; }
+
+/* ------------------------------------------------------------------------ */
+
+#else /* MY_ATOMIC_MODE_DUMMY */
+
+STATIC_INLINE int8
+my_atomic_load8(int8 volatile *a)
+{
+	return ((int8) atomic_or_8_nv((volatile uint8_t *)a, 0));
+}
+
+STATIC_INLINE int16
+my_atomic_load16(int16 volatile *a)
+{
+	return ((int16) atomic_or_16_nv((volatile uint16_t *)a, 0));
+}
+
+STATIC_INLINE int32
+my_atomic_load32(int32 volatile *a)
+{
+	return ((int32) atomic_or_32_nv((volatile uint32_t *)a, 0));
+}
+
+STATIC_INLINE void *
+my_atomic_loadptr(void * volatile *a)
+{
+	return ((void *) atomic_or_ulong_nv((volatile ulong_t *)a, 0));
+}
+
+/* ------------------------------------------------------------------------ */
+
+STATIC_INLINE void
+my_atomic_store8(int8 volatile *a, int8 v)
+{
+	(void) atomic_swap_8((volatile uint8_t *)a, (uint8_t)v);
+}
+
+STATIC_INLINE void
+my_atomic_store16(int16 volatile *a, int16 v)
+{
+	(void) atomic_swap_16((volatile uint16_t *)a, (uint16_t)v);
+}
+
+STATIC_INLINE void
+my_atomic_store32(int32 volatile *a, int32 v)
+{
+	(void) atomic_swap_32((volatile uint32_t *)a, (uint32_t)v);
+}
+
+STATIC_INLINE void
+my_atomic_storeptr(void * volatile *a, void *v)
+{
+	(void) atomic_swap_ptr(a, v);
+}
+
+#endif
+
+/* ------------------------------------------------------------------------ */
+
+STATIC_INLINE int8
+my_atomic_swap8(int8 volatile *a, int8 v)
+{
+	return ((int8) atomic_swap_8((volatile uint8_t *)a, (uint8_t)v));
+}
+
+STATIC_INLINE int16
+my_atomic_swap16(int16 volatile *a, int16 v)
+{
+	return ((int16) atomic_swap_16((volatile uint16_t *)a, (uint16_t)v));
+}
+
+STATIC_INLINE int32
+my_atomic_swap32(int32 volatile *a, int32 v)
+{
+	return ((int32) atomic_swap_32((volatile uint32_t *)a, (uint32_t)v));
+}
+
+STATIC_INLINE void *
+my_atomic_swapptr(void * volatile *a, void *v)
+{
+	return (atomic_swap_ptr(a, v));
+}

=== modified file 'include/atomic/x86-gcc.h'
--- a/include/atomic/x86-gcc.h	2008-05-29 15:44:11 +0000
+++ b/include/atomic/x86-gcc.h	2009-07-02 14:23:36 +0000
@@ -43,7 +43,7 @@
   asm volatile (LOCK_prefix "; xadd %0, %1;" : "+r" (v) , "+m" (*a))
 #endif
 #define make_atomic_fas_body(S)				\
-  asm volatile ("xchg %0, %1;" : "+r" (v) , "+m" (*a))
+  asm volatile ("xchg %0, %1;" : "+q" (v) , "+m" (*a))
 #define make_atomic_cas_body(S)					\
   asm volatile (LOCK_prefix "; cmpxchg %3, %0; setz %2;"	\
                : "+m" (*a), "+a" (*cmp), "=q" (ret): "r" (set))

=== modified file 'include/my_atomic.h'
--- a/include/my_atomic.h	2008-07-22 14:16:22 +0000
+++ b/include/my_atomic.h	2009-07-02 14:23:36 +0000
@@ -54,6 +54,9 @@
 #define intptr         void *
 
 #ifndef MY_ATOMIC_MODE_RWLOCKS
+/*
+ * Attempt to do atomic ops without locks
+ */
 #include "atomic/nolock.h"
 #endif
 
@@ -194,7 +197,7 @@ extern int ## S my_atomic_load ## S(Uv_ 
 #define make_atomic_store(S)                                    \
 extern void my_atomic_store ## S(Uv_ ## S U_a, U_ ## S U_v);
 
-#endif
+#endif /* HAVE_INLINE */
 
 make_atomic_cas(32)
 make_atomic_cas(ptr)

=== added file 'include/probes_mysql.d.base'
--- a/include/probes_mysql.d.base	1970-01-01 00:00:00 +0000
+++ b/include/probes_mysql.d.base	2009-03-18 10:04:15 +0000
@@ -0,0 +1,176 @@
+/* Copyright (C) 2008 MySQL AB
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA */
+
+/*
+  The actual probe names in DTrace scripts will replace '__' by '-'. Thus
+  insert__row__start will be insert-row-start.
+
+  Recommendations for adding new probes:
+
+  - each probe should have the minimal set of arguments required to
+  unambiguously identify the context in which the probe fires. Redundant
+  arguments (i.e. the ones that can be obtained in user scripts from previous
+  probes' arguments or otherwise) may be added for convenience.
+
+  - try to avoid computationally expensive probe arguments. If impossible,
+  use *_ENABLED() macros to check if the probe is activated before
+  performing expensive calculations for a probe argument.
+
+  - all *-done probes should have a status argument wherever applicable to make
+  it possible for user scripts to figure out whether the completed operation
+  was successful or not.
+  
+  - for all status arguments, a non-zero value should be returned on error or
+  failure, 0 should be returned on success.
+*/
+
+provider mysql {
+  
+  /* The following ones fire when creating or closing a client connection */
+  probe connection__start(unsigned long conn_id, char *user, char *host);
+  probe connection__done(int status, unsigned long conn_id);
+
+  /*
+    Fire at the start/end of any client command processing (including SQL
+    queries).
+  */
+  probe command__start(unsigned long conn_id, int command,
+                       char *user, char *host);
+  probe command__done(int status);
+  
+  /*
+    The following probes fire at the start/end of any SQL query processing,
+    respectively.
+
+    query_start() has a lot of parameters that can be used to pick up
+    parameters for a lot of other probes here.  For simplicity reasons we also
+    add the query string to most other DTrace probes as well. Hostname is
+    either the hostname or the IP address of the MySQL Client.
+  */
+  probe query__start(char *query,
+                     unsigned long conn_id,
+                     char *db_name,
+                     char *user,
+                     char *host);
+  probe query__done(int status); 
+
+  /* Fire at the start/end of SQL query parsing */
+  probe query__parse__start(char *query);
+  probe query__parse__done(int status);
+
+  /* Track whether the query hits the query cache or not */
+  probe query__cache__hit(char *query, unsigned long rows);
+  probe query__cache__miss(char *query);
+
+  /*
+    This probe fires when the actual query execution starts, i.e. after
+    parsing and checking the query cache, but before privilege checks,
+    optimizing, etc.
+
+    Query means also all independent queries of a stored procedure and prepared
+    statements. Also the stored procedure itself is a query.
+
+    exec_type is:
+    0:           Executed query from sql_parse, top-level query (sql_parse.cc)
+    1:           Executed prepared statement (sql_prepare.cc)
+    2:           Executed cursor statement (sql_cursor.cc)
+    3:           Executed query in stored procedure (sp_head.cc)
+  */
+  probe query__exec__start(char *query,
+                           unsigned long connid,
+                           char *db_name,
+                           char *user,
+                           char *host,
+                           int exec_type);
+  probe query__exec__done(int status);
+
+  /* These probes fire when performing row operations towards any handler */
+  probe insert__row__start(char *db, char *table);
+  probe insert__row__done(int status);
+  probe update__row__start(char *db, char *table);
+  probe update__row__done(int status);
+  probe delete__row__start(char *db, char *table);
+  probe delete__row__done(int status);
+  probe read__row__start(char *db, char *table, int scan_flag);
+  probe read__row__done(int status);
+  probe index__read__row__start(char *db, char *table);
+  probe index__read__row__done(int status);
+  
+  /*
+    These probes fire when calling external_lock for any handler
+    depending on the lock type being acquired or released.
+  */
+  probe handler__rdlock__start(char *db, char *table);
+  probe handler__wrlock__start(char *db, char *table);
+  probe handler__unlock__start(char *db, char *table);
+  probe handler__rdlock__done(int status);
+  probe handler__wrlock__done(int status);
+  probe handler__unlock__done(int status);
+  
+  /*
+    These probes fire when a filesort activity happens in a query.
+  */
+  probe filesort__start(char *db, char *table);
+  probe filesort__done(int status, unsigned long rows);
+  /*
+    The query types SELECT, INSERT, INSERT AS SELECT, UPDATE, UPDATE with
+    multiple tables, DELETE, DELETE with multiple tables are all probed.
+    The start probe always contains the query text.
+  */
+  probe select__start(char *query);
+  probe select__done(int status, unsigned long rows);
+  probe insert__start(char *query);
+  probe insert__done(int status, unsigned long rows);
+  probe insert__select__start(char *query);
+  probe insert__select__done(int status, unsigned long rows);
+  probe update__start(char *query);
+  probe update__done(int status,
+                     unsigned long rowsmatches, unsigned long rowschanged);
+  probe multi__update__start(char *query);
+  probe multi__update__done(int status,
+                            unsigned long rowsmatches, 
+                            unsigned long rowschanged);
+  probe delete__start(char *query);
+  probe delete__done(int status, unsigned long rows);
+  probe multi__delete__start(char *query);
+  probe multi__delete__done(int status, unsigned long rows);
+
+  /*
+    These probes can be used to measure the time waiting for network traffic
+    or identify network-related problems.
+  */
+  probe net__read__start();
+  probe net__read__done(int status, unsigned long bytes);
+  probe net__write__start(unsigned long bytes);
+  probe net__write__done(int status);
+
+  /* MyISAM Key cache probes */
+  probe keycache__read__start(char *filepath, unsigned long  bytes,
+                              unsigned long mem_used, unsigned long mem_free);
+  probe keycache__read__block(unsigned long bytes);
+  probe keycache__read__hit();
+  probe keycache__read__miss();
+  probe keycache__read__done(unsigned long mem_used, unsigned long mem_free);
+  probe keycache__write__start(char *filepath, unsigned long bytes,
+                               unsigned long mem_used, unsigned long mem_free);
+  probe keycache__write__block(unsigned long bytes);
+  probe keycache__write__done(unsigned long mem_used, unsigned long mem_free);
+};
+
+#pragma D attributes Evolving/Evolving/Common provider mysql provider
+#pragma D attributes Evolving/Evolving/Common provider mysql module
+#pragma D attributes Evolving/Evolving/Common provider mysql function
+#pragma D attributes Evolving/Evolving/Common provider mysql name
+#pragma D attributes Evolving/Evolving/Common provider mysql args

=== added file 'include/probes_mysql.h'
--- a/include/probes_mysql.h	1970-01-01 00:00:00 +0000
+++ b/include/probes_mysql.h	2008-12-20 10:01:41 +0000
@@ -0,0 +1,13 @@
+#ifndef PROBES_MYSQL_H
+
+#define PROBES_MYSQL_H
+
+#include <my_global.h>
+
+#if defined(HAVE_DTRACE) && !defined(DISABLE_DTRACE)
+#include "probes_mysql_dtrace.h"
+#else
+#include "probes_mysql_nodtrace.h"
+#endif
+
+#endif /* PROBES_MYSQL_H */

=== added file 'include/probes_mysql_nodtrace.h'
--- a/include/probes_mysql_nodtrace.h	1970-01-01 00:00:00 +0000
+++ b/include/probes_mysql_nodtrace.h	2008-12-23 09:05:30 +0000
@@ -0,0 +1,129 @@
+/*
+ * Generated by dheadgen(1).
+ */
+
+#ifndef	_PROBES_MYSQL_D
+#define	_PROBES_MYSQL_D
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+#define	MYSQL_CONNECTION_START(arg0, arg1, arg2)
+#define	MYSQL_CONNECTION_START_ENABLED() (0)
+#define	MYSQL_CONNECTION_DONE(arg0, arg1)
+#define	MYSQL_CONNECTION_DONE_ENABLED() (0)
+#define	MYSQL_COMMAND_START(arg0, arg1, arg2, arg3)
+#define	MYSQL_COMMAND_START_ENABLED() (0)
+#define	MYSQL_COMMAND_DONE(arg0)
+#define	MYSQL_COMMAND_DONE_ENABLED() (0)
+#define	MYSQL_QUERY_START(arg0, arg1, arg2, arg3, arg4)
+#define	MYSQL_QUERY_START_ENABLED() (0)
+#define	MYSQL_QUERY_DONE(arg0)
+#define	MYSQL_QUERY_DONE_ENABLED() (0)
+#define	MYSQL_QUERY_PARSE_START(arg0)
+#define	MYSQL_QUERY_PARSE_START_ENABLED() (0)
+#define	MYSQL_QUERY_PARSE_DONE(arg0)
+#define	MYSQL_QUERY_PARSE_DONE_ENABLED() (0)
+#define	MYSQL_QUERY_CACHE_HIT(arg0, arg1)
+#define	MYSQL_QUERY_CACHE_HIT_ENABLED() (0)
+#define	MYSQL_QUERY_CACHE_MISS(arg0)
+#define	MYSQL_QUERY_CACHE_MISS_ENABLED() (0)
+#define	MYSQL_QUERY_EXEC_START(arg0, arg1, arg2, arg3, arg4, arg5)
+#define	MYSQL_QUERY_EXEC_START_ENABLED() (0)
+#define	MYSQL_QUERY_EXEC_DONE(arg0)
+#define	MYSQL_QUERY_EXEC_DONE_ENABLED() (0)
+#define	MYSQL_INSERT_ROW_START(arg0, arg1)
+#define	MYSQL_INSERT_ROW_START_ENABLED() (0)
+#define	MYSQL_INSERT_ROW_DONE(arg0)
+#define	MYSQL_INSERT_ROW_DONE_ENABLED() (0)
+#define	MYSQL_UPDATE_ROW_START(arg0, arg1)
+#define	MYSQL_UPDATE_ROW_START_ENABLED() (0)
+#define	MYSQL_UPDATE_ROW_DONE(arg0)
+#define	MYSQL_UPDATE_ROW_DONE_ENABLED() (0)
+#define	MYSQL_DELETE_ROW_START(arg0, arg1)
+#define	MYSQL_DELETE_ROW_START_ENABLED() (0)
+#define	MYSQL_DELETE_ROW_DONE(arg0)
+#define	MYSQL_DELETE_ROW_DONE_ENABLED() (0)
+#define	MYSQL_READ_ROW_START(arg0, arg1, arg2)
+#define	MYSQL_READ_ROW_START_ENABLED() (0)
+#define	MYSQL_READ_ROW_DONE(arg0)
+#define	MYSQL_READ_ROW_DONE_ENABLED() (0)
+#define	MYSQL_INDEX_READ_ROW_START(arg0, arg1)
+#define	MYSQL_INDEX_READ_ROW_START_ENABLED() (0)
+#define	MYSQL_INDEX_READ_ROW_DONE(arg0)
+#define	MYSQL_INDEX_READ_ROW_DONE_ENABLED() (0)
+#define	MYSQL_HANDLER_RDLOCK_START(arg0, arg1)
+#define	MYSQL_HANDLER_RDLOCK_START_ENABLED() (0)
+#define	MYSQL_HANDLER_WRLOCK_START(arg0, arg1)
+#define	MYSQL_HANDLER_WRLOCK_START_ENABLED() (0)
+#define	MYSQL_HANDLER_UNLOCK_START(arg0, arg1)
+#define	MYSQL_HANDLER_UNLOCK_START_ENABLED() (0)
+#define	MYSQL_HANDLER_RDLOCK_DONE(arg0)
+#define	MYSQL_HANDLER_RDLOCK_DONE_ENABLED() (0)
+#define	MYSQL_HANDLER_WRLOCK_DONE(arg0)
+#define	MYSQL_HANDLER_WRLOCK_DONE_ENABLED() (0)
+#define	MYSQL_HANDLER_UNLOCK_DONE(arg0)
+#define	MYSQL_HANDLER_UNLOCK_DONE_ENABLED() (0)
+#define	MYSQL_FILESORT_START(arg0, arg1)
+#define	MYSQL_FILESORT_START_ENABLED() (0)
+#define	MYSQL_FILESORT_DONE(arg0, arg1)
+#define	MYSQL_FILESORT_DONE_ENABLED() (0)
+#define	MYSQL_SELECT_START(arg0)
+#define	MYSQL_SELECT_START_ENABLED() (0)
+#define	MYSQL_SELECT_DONE(arg0, arg1)
+#define	MYSQL_SELECT_DONE_ENABLED() (0)
+#define	MYSQL_INSERT_START(arg0)
+#define	MYSQL_INSERT_START_ENABLED() (0)
+#define	MYSQL_INSERT_DONE(arg0, arg1)
+#define	MYSQL_INSERT_DONE_ENABLED() (0)
+#define	MYSQL_INSERT_SELECT_START(arg0)
+#define	MYSQL_INSERT_SELECT_START_ENABLED() (0)
+#define	MYSQL_INSERT_SELECT_DONE(arg0, arg1)
+#define	MYSQL_INSERT_SELECT_DONE_ENABLED() (0)
+#define	MYSQL_UPDATE_START(arg0)
+#define	MYSQL_UPDATE_START_ENABLED() (0)
+#define	MYSQL_UPDATE_DONE(arg0, arg1, arg2)
+#define	MYSQL_UPDATE_DONE_ENABLED() (0)
+#define	MYSQL_MULTI_UPDATE_START(arg0)
+#define	MYSQL_MULTI_UPDATE_START_ENABLED() (0)
+#define	MYSQL_MULTI_UPDATE_DONE(arg0, arg1, arg2)
+#define	MYSQL_MULTI_UPDATE_DONE_ENABLED() (0)
+#define	MYSQL_DELETE_START(arg0)
+#define	MYSQL_DELETE_START_ENABLED() (0)
+#define	MYSQL_DELETE_DONE(arg0, arg1)
+#define	MYSQL_DELETE_DONE_ENABLED() (0)
+#define	MYSQL_MULTI_DELETE_START(arg0)
+#define	MYSQL_MULTI_DELETE_START_ENABLED() (0)
+#define	MYSQL_MULTI_DELETE_DONE(arg0, arg1)
+#define	MYSQL_MULTI_DELETE_DONE_ENABLED() (0)
+#define	MYSQL_NET_READ_START()
+#define	MYSQL_NET_READ_START_ENABLED() (0)
+#define	MYSQL_NET_READ_DONE(arg0, arg1)
+#define	MYSQL_NET_READ_DONE_ENABLED() (0)
+#define	MYSQL_NET_WRITE_START(arg0)
+#define	MYSQL_NET_WRITE_START_ENABLED() (0)
+#define	MYSQL_NET_WRITE_DONE(arg0)
+#define	MYSQL_NET_WRITE_DONE_ENABLED() (0)
+#define	MYSQL_KEYCACHE_READ_START(arg0, arg1, arg2, arg3)
+#define	MYSQL_KEYCACHE_READ_START_ENABLED() (0)
+#define	MYSQL_KEYCACHE_READ_BLOCK(arg0)
+#define	MYSQL_KEYCACHE_READ_BLOCK_ENABLED() (0)
+#define	MYSQL_KEYCACHE_READ_HIT()
+#define	MYSQL_KEYCACHE_READ_HIT_ENABLED() (0)
+#define	MYSQL_KEYCACHE_READ_MISS()
+#define	MYSQL_KEYCACHE_READ_MISS_ENABLED() (0)
+#define	MYSQL_KEYCACHE_READ_DONE(arg0, arg1)
+#define	MYSQL_KEYCACHE_READ_DONE_ENABLED() (0)
+#define	MYSQL_KEYCACHE_WRITE_START(arg0, arg1, arg2, arg3)
+#define	MYSQL_KEYCACHE_WRITE_START_ENABLED() (0)
+#define	MYSQL_KEYCACHE_WRITE_BLOCK(arg0)
+#define	MYSQL_KEYCACHE_WRITE_BLOCK_ENABLED() (0)
+#define	MYSQL_KEYCACHE_WRITE_DONE(arg0, arg1)
+#define	MYSQL_KEYCACHE_WRITE_DONE_ENABLED() (0)
+
+#ifdef  __cplusplus
+}
+#endif
+
+#endif  /* _PROBES_MYSQL_D */

=== modified file 'libmysql/Makefile.shared'
--- a/libmysql/Makefile.shared	2009-03-16 17:00:38 +0000
+++ b/libmysql/Makefile.shared	2009-07-02 14:23:36 +0000
@@ -73,7 +73,8 @@ DEFS =			-DDEFAULT_CHARSET_HOME="\"$(MYS
 			-DDEFAULT_HOME_ENV=MYSQL_HOME \
 			-DDEFAULT_GROUP_SUFFIX_ENV=MYSQL_GROUP_SUFFIX \
 			-DDEFAULT_SYSCONFDIR="\"$(sysconfdir)\"" \
-			-DSHAREDIR="\"$(MYSQLSHAREdir)\"" $(target_defs)
+			-DSHAREDIR="\"$(MYSQLSHAREdir)\"" -DDISABLE_DTRACE \
+			$(target_defs)
 
 if HAVE_YASSL
 yassl_las = $(top_builddir)/extra/yassl/src/libyassl.la \

=== modified file 'libmysqld/Makefile.am'
--- a/libmysqld/Makefile.am	2009-05-04 15:51:55 +0000
+++ b/libmysqld/Makefile.am	2009-07-02 14:23:36 +0000
@@ -30,7 +30,8 @@ DEFS =			-DEMBEDDED_LIBRARY -DMYSQL_SERV
 			-DSHAREDIR="\"$(MYSQLSHAREdir)\"" \
 			@DEFS@ \
 			-DLIBDIR="\"$(MYSQLLIBdir)\"" \
-			-DPLUGINDIR="\"$(pkgplugindir)\""
+			-DPLUGINDIR="\"$(pkgplugindir)\"" \
+			-DDISABLE_DTRACE
 AM_CPPFLAGS =		-I$(top_srcdir)/include \
 			-I$(top_builddir)/sql -I$(top_srcdir)/sql \
 			-I$(top_srcdir)/sql/examples \

=== modified file 'mysql-test/include/default_mysqld.cnf'
--- a/mysql-test/include/default_mysqld.cnf	2009-02-02 15:58:48 +0000
+++ b/mysql-test/include/default_mysqld.cnf	2009-07-02 14:23:36 +0000
@@ -14,6 +14,11 @@ sort_buffer=                256K
 max_heap_table_size=        1M
 
 loose-innodb_data_file_path=      ibdata1:10M:autoextend
+loose-innodb_buffer_pool_size=    64M
+loose-innodb_write_io_threads=    2
+loose-innodb_read_io_threads=     2
+loose-innodb_log_buffer_size=     4M
+loose-innodb_log_file_size=       32M
 
 slave-net-timeout=120
 

=== modified file 'mysql-test/r/innodb.result'
--- a/mysql-test/r/innodb.result	2009-06-19 09:12:06 +0000
+++ b/mysql-test/r/innodb.result	2009-07-02 14:23:36 +0000
@@ -1725,7 +1725,7 @@ count(*)
 drop table t1;
 show status like "Innodb_buffer_pool_pages_total";
 Variable_name	Value
-Innodb_buffer_pool_pages_total	512
+Innodb_buffer_pool_pages_total	4096
 show status like "Innodb_page_size";
 Variable_name	Value
 Innodb_page_size	16384
@@ -1771,7 +1771,7 @@ innodb_sync_spin_loops	20
 SET @old_innodb_thread_concurrency= @@global.innodb_thread_concurrency;
 show variables like "innodb_thread_concurrency";
 Variable_name	Value
-innodb_thread_concurrency	8
+innodb_thread_concurrency	0
 set global innodb_thread_concurrency=1001;
 Warnings:
 Warning	1292	Truncated incorrect thread_concurrency value: '1001'

=== modified file 'mysql-test/r/merge.result'
--- a/mysql-test/r/merge.result	2009-04-22 10:02:28 +0000
+++ b/mysql-test/r/merge.result	2009-07-02 16:13:21 +0000
@@ -2039,10 +2039,10 @@ set @save_table_definition_cache=@@globa
 #
 # Set @@global.table_definition_cache to minimum
 #
-set @@global.table_definition_cache=256;
+set @@global.table_definition_cache=400;
 set @a=null;
 #
-# Create 256 merge children
+# Create 400 merge children
 #
 set @a=concat("create table t_parent (a int) union(", @a,
 ") insert_method=first engine=mrg_myisam");

=== modified file 'mysql-test/r/partition_innodb.result'
--- a/mysql-test/r/partition_innodb.result	2008-12-13 11:02:16 +0000
+++ b/mysql-test/r/partition_innodb.result	2009-07-02 14:23:36 +0000
@@ -11,13 +11,7 @@ SET @old_tx_isolation := @@session.tx_is
 SET SESSION TRANSACTION ISOLATION LEVEL READ COMMITTED;
 SET autocommit = 0;
 UPDATE t1 SET DATA = data*2 WHERE id = 3;
-SHOW ENGINE InnoDB STATUS;
-Type	Name	Status
-InnoDB		2 lock struct(s) 1 row lock(s)
 UPDATE t1 SET data = data*2 WHERE data = 2;
-SHOW ENGINE InnoDB STATUS;
-Type	Name	Status
-InnoDB		6 lock struct(s) 2 row lock(s)
 SET @@session.tx_isolation = @old_tx_isolation;
 DROP TABLE t1;
 # Bug#37721, test of ORDER BY on PK and WHERE on INDEX

=== modified file 'mysql-test/suite/sys_vars/r/innodb_autoextend_increment_basic.result'
--- a/mysql-test/suite/sys_vars/r/innodb_autoextend_increment_basic.result	2009-02-03 09:16:53 +0000
+++ b/mysql-test/suite/sys_vars/r/innodb_autoextend_increment_basic.result	2009-07-02 14:23:36 +0000
@@ -6,13 +6,13 @@ Warning	1292	Truncated incorrect autoext
 SET @@global.innodb_autoextend_increment  = DEFAULT;
 SELECT @@global.innodb_autoextend_increment ;
 @@global.innodb_autoextend_increment
-8
+64
 '#---------------------FN_DYNVARS_046_02-------------------------#'
 SET innodb_autoextend_increment  = 1;
 ERROR HY000: Variable 'innodb_autoextend_increment' is a GLOBAL variable and should be set with SET GLOBAL
 SELECT @@innodb_autoextend_increment ;
 @@innodb_autoextend_increment
-8
+64
 SELECT local.innodb_autoextend_increment ;
 ERROR 42S02: Unknown table 'local' in field list
 SET global innodb_autoextend_increment  = 0;

=== modified file 'mysql-test/suite/sys_vars/r/innodb_file_io_threads_basic.result'
--- a/mysql-test/suite/sys_vars/r/innodb_file_io_threads_basic.result	2008-12-19 15:12:15 +0000
+++ b/mysql-test/suite/sys_vars/r/innodb_file_io_threads_basic.result	2009-02-17 12:24:09 +0000
@@ -1,53 +1,101 @@
 '#---------------------BS_STVARS_027_01----------------------#'
-SELECT COUNT(@@GLOBAL.innodb_file_io_threads);
-COUNT(@@GLOBAL.innodb_file_io_threads)
+SELECT COUNT(@@GLOBAL.innodb_read_io_threads);
+COUNT(@@GLOBAL.innodb_read_io_threads)
+1
+1 Expected
+SELECT COUNT(@@GLOBAL.innodb_write_io_threads);
+COUNT(@@GLOBAL.innodb_write_io_threads)
 1
 1 Expected
 '#---------------------BS_STVARS_027_02----------------------#'
-SET @@GLOBAL.innodb_file_io_threads=1;
-ERROR HY000: Variable 'innodb_file_io_threads' is a read only variable
+SET @@GLOBAL.innodb_read_io_threads=1;
+ERROR HY000: Variable 'innodb_read_io_threads' is a read only variable
+Expected error 'Read only variable'
+SELECT COUNT(@@GLOBAL.innodb_read_io_threads);
+COUNT(@@GLOBAL.innodb_read_io_threads)
+1
+1 Expected
+SET @@GLOBAL.innodb_write_io_threads=1;
+ERROR HY000: Variable 'innodb_write_io_threads' is a read only variable
 Expected error 'Read only variable'
-SELECT COUNT(@@GLOBAL.innodb_file_io_threads);
-COUNT(@@GLOBAL.innodb_file_io_threads)
+SELECT COUNT(@@GLOBAL.innodb_write_io_threads);
+COUNT(@@GLOBAL.innodb_write_io_threads)
 1
 1 Expected
 '#---------------------BS_STVARS_027_03----------------------#'
-SELECT @@GLOBAL.innodb_file_io_threads = VARIABLE_VALUE
+SELECT @@GLOBAL.innodb_read_io_threads = VARIABLE_VALUE
+FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
+WHERE VARIABLE_NAME='innodb_read_io_threads';
+@@GLOBAL.innodb_read_io_threads = VARIABLE_VALUE
+1
+1 Expected
+SELECT COUNT(@@GLOBAL.innodb_read_io_threads);
+COUNT(@@GLOBAL.innodb_read_io_threads)
+1
+1 Expected
+SELECT COUNT(VARIABLE_VALUE)
+FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES 
+WHERE VARIABLE_NAME='innodb_read_io_threads';
+COUNT(VARIABLE_VALUE)
+1
+1 Expected
+SELECT @@GLOBAL.innodb_write_io_threads = VARIABLE_VALUE
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
-WHERE VARIABLE_NAME='innodb_file_io_threads';
-@@GLOBAL.innodb_file_io_threads = VARIABLE_VALUE
+WHERE VARIABLE_NAME='innodb_write_io_threads';
+@@GLOBAL.innodb_write_io_threads = VARIABLE_VALUE
 1
 1 Expected
-SELECT COUNT(@@GLOBAL.innodb_file_io_threads);
-COUNT(@@GLOBAL.innodb_file_io_threads)
+SELECT COUNT(@@GLOBAL.innodb_write_io_threads);
+COUNT(@@GLOBAL.innodb_write_io_threads)
 1
 1 Expected
 SELECT COUNT(VARIABLE_VALUE)
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES 
-WHERE VARIABLE_NAME='innodb_file_io_threads';
+WHERE VARIABLE_NAME='innodb_write_io_threads';
 COUNT(VARIABLE_VALUE)
 1
 1 Expected
 '#---------------------BS_STVARS_027_04----------------------#'
-SELECT @@innodb_file_io_threads = @@GLOBAL.innodb_file_io_threads;
-@@innodb_file_io_threads = @@GLOBAL.innodb_file_io_threads
+SELECT @@innodb_read_io_threads = @@GLOBAL.innodb_read_io_threads;
+@@innodb_read_io_threads = @@GLOBAL.innodb_read_io_threads
+1
+1 Expected
+SELECT @@innodb_write_io_threads = @@GLOBAL.innodb_write_io_threads;
+@@innodb_write_io_threads = @@GLOBAL.innodb_write_io_threads
 1
 1 Expected
 '#---------------------BS_STVARS_027_05----------------------#'
-SELECT COUNT(@@innodb_file_io_threads);
-COUNT(@@innodb_file_io_threads)
+SELECT COUNT(@@innodb_read_io_threads);
+COUNT(@@innodb_read_io_threads)
+1
+1 Expected
+SELECT COUNT(@@local.innodb_read_io_threads);
+ERROR HY000: Variable 'innodb_read_io_threads' is a GLOBAL variable
+Expected error 'Variable is a GLOBAL variable'
+SELECT COUNT(@@SESSION.innodb_read_io_threads);
+ERROR HY000: Variable 'innodb_read_io_threads' is a GLOBAL variable
+Expected error 'Variable is a GLOBAL variable'
+SELECT COUNT(@@GLOBAL.innodb_read_io_threads);
+COUNT(@@GLOBAL.innodb_read_io_threads)
+1
+1 Expected
+SELECT innodb_read_io_threads = @@SESSION.innodb_read_io_threads;
+ERROR 42S22: Unknown column 'innodb_read_io_threads' in 'field list'
+Expected error 'Readonly variable'
+SELECT COUNT(@@innodb_write_io_threads);
+COUNT(@@innodb_write_io_threads)
 1
 1 Expected
-SELECT COUNT(@@local.innodb_file_io_threads);
-ERROR HY000: Variable 'innodb_file_io_threads' is a GLOBAL variable
+SELECT COUNT(@@local.innodb_write_io_threads);
+ERROR HY000: Variable 'innodb_write_io_threads' is a GLOBAL variable
 Expected error 'Variable is a GLOBAL variable'
-SELECT COUNT(@@SESSION.innodb_file_io_threads);
-ERROR HY000: Variable 'innodb_file_io_threads' is a GLOBAL variable
+SELECT COUNT(@@SESSION.innodb_write_io_threads);
+ERROR HY000: Variable 'innodb_write_io_threads' is a GLOBAL variable
 Expected error 'Variable is a GLOBAL variable'
-SELECT COUNT(@@GLOBAL.innodb_file_io_threads);
-COUNT(@@GLOBAL.innodb_file_io_threads)
+SELECT COUNT(@@GLOBAL.innodb_write_io_threads);
+COUNT(@@GLOBAL.innodb_write_io_threads)
 1
 1 Expected
-SELECT innodb_file_io_threads = @@SESSION.innodb_file_io_threads;
-ERROR 42S22: Unknown column 'innodb_file_io_threads' in 'field list'
+SELECT innodb_write_io_threads = @@SESSION.innodb_write_io_threads;
+ERROR 42S22: Unknown column 'innodb_write_io_threads' in 'field list'
 Expected error 'Readonly variable'

=== modified file 'mysql-test/suite/sys_vars/r/innodb_max_dirty_pages_pct_basic.result'
--- a/mysql-test/suite/sys_vars/r/innodb_max_dirty_pages_pct_basic.result	2008-12-19 15:12:15 +0000
+++ b/mysql-test/suite/sys_vars/r/innodb_max_dirty_pages_pct_basic.result	2009-02-17 12:24:09 +0000
@@ -1,19 +1,19 @@
 SET @global_start_value = @@global.innodb_max_dirty_pages_pct;
 SELECT @global_start_value;
 @global_start_value
-90
+75
 '#--------------------FN_DYNVARS_046_01------------------------#'
 SET @@global.innodb_max_dirty_pages_pct = 0;
 SET @@global.innodb_max_dirty_pages_pct = DEFAULT;
 SELECT @@global.innodb_max_dirty_pages_pct;
 @@global.innodb_max_dirty_pages_pct
-90
+75
 '#---------------------FN_DYNVARS_046_02-------------------------#'
 SET innodb_max_dirty_pages_pct = 1;
 ERROR HY000: Variable 'innodb_max_dirty_pages_pct' is a GLOBAL variable and should be set with SET GLOBAL
 SELECT @@innodb_max_dirty_pages_pct;
 @@innodb_max_dirty_pages_pct
-90
+75
 SELECT local.innodb_max_dirty_pages_pct;
 ERROR 42S02: Unknown table 'local' in field list
 SET global innodb_max_dirty_pages_pct = 0;
@@ -29,33 +29,33 @@ SET @@global.innodb_max_dirty_pages_pct 
 SELECT @@global.innodb_max_dirty_pages_pct;
 @@global.innodb_max_dirty_pages_pct
 1
-SET @@global.innodb_max_dirty_pages_pct = 100;
+SET @@global.innodb_max_dirty_pages_pct = 99;
 SELECT @@global.innodb_max_dirty_pages_pct;
 @@global.innodb_max_dirty_pages_pct
-100
+99
 '#--------------------FN_DYNVARS_046_04-------------------------#'
 SET @@global.innodb_max_dirty_pages_pct = -1;
 Warnings:
 Warning	1292	Truncated incorrect max_dirty_pages_pct value: '18446744073709551615'
 SELECT @@global.innodb_max_dirty_pages_pct;
 @@global.innodb_max_dirty_pages_pct
-100
+99
 SET @@global.innodb_max_dirty_pages_pct = "T";
 ERROR 42000: Incorrect argument type to variable 'innodb_max_dirty_pages_pct'
 SELECT @@global.innodb_max_dirty_pages_pct;
 @@global.innodb_max_dirty_pages_pct
-100
+99
 SET @@global.innodb_max_dirty_pages_pct = "Y";
 ERROR 42000: Incorrect argument type to variable 'innodb_max_dirty_pages_pct'
 SELECT @@global.innodb_max_dirty_pages_pct;
 @@global.innodb_max_dirty_pages_pct
-100
+99
 SET @@global.innodb_max_dirty_pages_pct = 1001;
 Warnings:
 Warning	1292	Truncated incorrect max_dirty_pages_pct value: '1001'
 SELECT @@global.innodb_max_dirty_pages_pct;
 @@global.innodb_max_dirty_pages_pct
-100
+99
 '#----------------------FN_DYNVARS_046_05------------------------#'
 SELECT @@global.innodb_max_dirty_pages_pct =
 VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
@@ -65,22 +65,22 @@ VARIABLE_VALUE
 1
 SELECT @@global.innodb_max_dirty_pages_pct;
 @@global.innodb_max_dirty_pages_pct
-100
+99
 SELECT VARIABLE_VALUE FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
 WHERE VARIABLE_NAME='innodb_max_dirty_pages_pct';
 VARIABLE_VALUE
-100
+99
 '#---------------------FN_DYNVARS_046_06-------------------------#'
 SET @@global.innodb_max_dirty_pages_pct = OFF;
 ERROR 42000: Incorrect argument type to variable 'innodb_max_dirty_pages_pct'
 SELECT @@global.innodb_max_dirty_pages_pct;
 @@global.innodb_max_dirty_pages_pct
-100
+99
 SET @@global.innodb_max_dirty_pages_pct = ON;
 ERROR 42000: Incorrect argument type to variable 'innodb_max_dirty_pages_pct'
 SELECT @@global.innodb_max_dirty_pages_pct;
 @@global.innodb_max_dirty_pages_pct
-100
+99
 '#---------------------FN_DYNVARS_046_07----------------------#'
 SET @@global.innodb_max_dirty_pages_pct = TRUE;
 SELECT @@global.innodb_max_dirty_pages_pct;
@@ -93,4 +93,4 @@ SELECT @@global.innodb_max_dirty_pages_p
 SET @@global.innodb_max_dirty_pages_pct = @global_start_value;
 SELECT @@global.innodb_max_dirty_pages_pct;
 @@global.innodb_max_dirty_pages_pct
-90
+75

=== modified file 'mysql-test/suite/sys_vars/r/innodb_thread_concurrency_basic.result'
--- a/mysql-test/suite/sys_vars/r/innodb_thread_concurrency_basic.result	2008-12-19 15:12:15 +0000
+++ b/mysql-test/suite/sys_vars/r/innodb_thread_concurrency_basic.result	2009-02-17 12:24:09 +0000
@@ -1,19 +1,19 @@
 SET @global_start_value = @@global.innodb_thread_concurrency;
 SELECT @global_start_value;
 @global_start_value
-8
+0
 '#--------------------FN_DYNVARS_046_01------------------------#'
 SET @@global.innodb_thread_concurrency = 0;
 SET @@global.innodb_thread_concurrency = DEFAULT;
 SELECT @@global.innodb_thread_concurrency;
 @@global.innodb_thread_concurrency
-8
+0
 '#---------------------FN_DYNVARS_046_02-------------------------#'
 SET innodb_thread_concurrency = 1;
 ERROR HY000: Variable 'innodb_thread_concurrency' is a GLOBAL variable and should be set with SET GLOBAL
 SELECT @@innodb_thread_concurrency;
 @@innodb_thread_concurrency
-8
+0
 SELECT local.innodb_thread_concurrency;
 ERROR 42S02: Unknown table 'local' in field list
 SET global innodb_thread_concurrency = 0;
@@ -93,4 +93,4 @@ SELECT @@global.innodb_thread_concurrenc
 SET @@global.innodb_thread_concurrency = @global_start_value;
 SELECT @@global.innodb_thread_concurrency;
 @@global.innodb_thread_concurrency
-8
+0

=== modified file 'mysql-test/suite/sys_vars/r/table_definition_cache_basic.result'
--- a/mysql-test/suite/sys_vars/r/table_definition_cache_basic.result	2009-02-27 20:43:43 +0000
+++ b/mysql-test/suite/sys_vars/r/table_definition_cache_basic.result	2009-06-11 10:07:59 +0000
@@ -1,7 +1,7 @@
 SET @start_value = @@global.table_definition_cache;
 SELECT @start_value;
 @start_value
-256
+400
 '#--------------------FN_DYNVARS_019_01------------------------#'
 SET @@global.table_definition_cache = 100;
 Warnings:
@@ -9,12 +9,12 @@ Warning	1292	Truncated incorrect table_d
 SET @@global.table_definition_cache = DEFAULT;
 SELECT @@global.table_definition_cache;
 @@global.table_definition_cache
-256
+400
 '#---------------------FN_DYNVARS_019_02-------------------------#'
 SET @@global.table_definition_cache = DEFAULT;
-SELECT @@global.table_definition_cache = 128;
-@@global.table_definition_cache = 128
-0
+SELECT @@global.table_definition_cache = 400;
+@@global.table_definition_cache = 400
+1
 '#--------------------FN_DYNVARS_019_03------------------------#'
 SET @@global.table_definition_cache = 1;
 Warnings:
@@ -109,4 +109,4 @@ ERROR 42S22: Unknown column 'table_defin
 SET @@global.table_definition_cache = @start_value;
 SELECT @@global.table_definition_cache;
 @@global.table_definition_cache
-256
+400

=== modified file 'mysql-test/suite/sys_vars/r/table_open_cache_basic.result'
--- a/mysql-test/suite/sys_vars/r/table_open_cache_basic.result	2009-02-27 20:43:43 +0000
+++ b/mysql-test/suite/sys_vars/r/table_open_cache_basic.result	2009-06-11 10:07:59 +0000
@@ -1,17 +1,17 @@
 SET @start_value = @@global.table_open_cache ;
 SELECT @start_value;
 @start_value
-64
+400
 '#--------------------FN_DYNVARS_001_01------------------------#'
 SET @@global.table_open_cache  = 99;
 SET @@global.table_open_cache  = DeFAULT;
 SELECT @@global.table_open_cache;
 @@global.table_open_cache
-64
+400
 '#---------------------FN_DYNVARS_001_02-------------------------#'
 SET @@global.table_open_cache = Default;
-SELECT @@global.table_open_cache  = 64;
-@@global.table_open_cache  = 64
+SELECT @@global.table_open_cache  = 400;
+@@global.table_open_cache  = 400
 1
 '#--------------------FN_DYNVARS_001_03------------------------#'
 SET @@global.table_open_cache  = 8;
@@ -105,4 +105,4 @@ ERROR 42S22: Unknown column 'table_open_
 SET @@global.table_open_cache = @start_value;
 SELECT @@global.table_open_cache ;
 @@global.table_open_cache
-64
+400

=== modified file 'mysql-test/suite/sys_vars/t/innodb_file_io_threads_basic.test'
--- a/mysql-test/suite/sys_vars/t/innodb_file_io_threads_basic.test	2008-12-19 15:12:15 +0000
+++ b/mysql-test/suite/sys_vars/t/innodb_file_io_threads_basic.test	2009-02-17 12:24:09 +0000
@@ -28,7 +28,9 @@
 ####################################################################
 #   Displaying default value                                       #
 ####################################################################
-SELECT COUNT(@@GLOBAL.innodb_file_io_threads);
+SELECT COUNT(@@GLOBAL.innodb_read_io_threads);
+--echo 1 Expected
+SELECT COUNT(@@GLOBAL.innodb_write_io_threads);
 --echo 1 Expected
 
 
@@ -38,10 +40,17 @@ SELECT COUNT(@@GLOBAL.innodb_file_io_thr
 ####################################################################
 
 --error ER_INCORRECT_GLOBAL_LOCAL_VAR
-SET @@GLOBAL.innodb_file_io_threads=1;
+SET @@GLOBAL.innodb_read_io_threads=1;
 --echo Expected error 'Read only variable'
 
-SELECT COUNT(@@GLOBAL.innodb_file_io_threads);
+SELECT COUNT(@@GLOBAL.innodb_read_io_threads);
+--echo 1 Expected
+
+--error ER_INCORRECT_GLOBAL_LOCAL_VAR
+SET @@GLOBAL.innodb_write_io_threads=1;
+--echo Expected error 'Read only variable'
+
+SELECT COUNT(@@GLOBAL.innodb_write_io_threads);
 --echo 1 Expected
 
 
@@ -52,51 +61,84 @@ SELECT COUNT(@@GLOBAL.innodb_file_io_thr
 # Check if the value in GLOBAL Table matches value in variable  #
 #################################################################
 
-SELECT @@GLOBAL.innodb_file_io_threads = VARIABLE_VALUE
+SELECT @@GLOBAL.innodb_read_io_threads = VARIABLE_VALUE
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
-WHERE VARIABLE_NAME='innodb_file_io_threads';
+WHERE VARIABLE_NAME='innodb_read_io_threads';
 --echo 1 Expected
 
-SELECT COUNT(@@GLOBAL.innodb_file_io_threads);
+SELECT COUNT(@@GLOBAL.innodb_read_io_threads);
 --echo 1 Expected
 
 SELECT COUNT(VARIABLE_VALUE)
 FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES 
-WHERE VARIABLE_NAME='innodb_file_io_threads';
+WHERE VARIABLE_NAME='innodb_read_io_threads';
+--echo 1 Expected
+
+SELECT @@GLOBAL.innodb_write_io_threads = VARIABLE_VALUE
+FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES
+WHERE VARIABLE_NAME='innodb_write_io_threads';
+--echo 1 Expected
+
+SELECT COUNT(@@GLOBAL.innodb_write_io_threads);
 --echo 1 Expected
 
+SELECT COUNT(VARIABLE_VALUE)
+FROM INFORMATION_SCHEMA.GLOBAL_VARIABLES 
+WHERE VARIABLE_NAME='innodb_write_io_threads';
+--echo 1 Expected
 
 
 --echo '#---------------------BS_STVARS_027_04----------------------#'
 ################################################################################
 #  Check if accessing variable with and without GLOBAL point to same variable  #
 ################################################################################
-SELECT @@innodb_file_io_threads = @@GLOBAL.innodb_file_io_threads;
+SELECT @@innodb_read_io_threads = @@GLOBAL.innodb_read_io_threads;
 --echo 1 Expected
 
+SELECT @@innodb_write_io_threads = @@GLOBAL.innodb_write_io_threads;
+--echo 1 Expected
 
 
 --echo '#---------------------BS_STVARS_027_05----------------------#'
 ################################################################################
-#   Check if innodb_file_io_threads can be accessed with and without @@ sign   #
+#   Check if innodb_read_io_threads can be accessed with and without @@ sign   #
+#   Check if innodb_write_io_threads can be accessed with and without @@ sign  #
 ################################################################################
 
-SELECT COUNT(@@innodb_file_io_threads);
+SELECT COUNT(@@innodb_read_io_threads);
+--echo 1 Expected
+
+--Error ER_INCORRECT_GLOBAL_LOCAL_VAR
+SELECT COUNT(@@local.innodb_read_io_threads);
+--echo Expected error 'Variable is a GLOBAL variable'
+
+--Error ER_INCORRECT_GLOBAL_LOCAL_VAR
+SELECT COUNT(@@SESSION.innodb_read_io_threads);
+--echo Expected error 'Variable is a GLOBAL variable'
+
+SELECT COUNT(@@GLOBAL.innodb_read_io_threads);
+--echo 1 Expected
+
+--Error ER_BAD_FIELD_ERROR
+SELECT innodb_read_io_threads = @@SESSION.innodb_read_io_threads;
+--echo Expected error 'Readonly variable'
+
+SELECT COUNT(@@innodb_write_io_threads);
 --echo 1 Expected
 
 --Error ER_INCORRECT_GLOBAL_LOCAL_VAR
-SELECT COUNT(@@local.innodb_file_io_threads);
+SELECT COUNT(@@local.innodb_write_io_threads);
 --echo Expected error 'Variable is a GLOBAL variable'
 
 --Error ER_INCORRECT_GLOBAL_LOCAL_VAR
-SELECT COUNT(@@SESSION.innodb_file_io_threads);
+SELECT COUNT(@@SESSION.innodb_write_io_threads);
 --echo Expected error 'Variable is a GLOBAL variable'
 
-SELECT COUNT(@@GLOBAL.innodb_file_io_threads);
+SELECT COUNT(@@GLOBAL.innodb_write_io_threads);
 --echo 1 Expected
 
 --Error ER_BAD_FIELD_ERROR
-SELECT innodb_file_io_threads = @@SESSION.innodb_file_io_threads;
+SELECT innodb_write_io_threads = @@SESSION.innodb_write_io_threads;
 --echo Expected error 'Readonly variable'
 
 

=== modified file 'mysql-test/suite/sys_vars/t/innodb_max_dirty_pages_pct_basic.test'
--- a/mysql-test/suite/sys_vars/t/innodb_max_dirty_pages_pct_basic.test	2008-12-19 15:12:15 +0000
+++ b/mysql-test/suite/sys_vars/t/innodb_max_dirty_pages_pct_basic.test	2009-02-17 12:24:09 +0000
@@ -72,7 +72,7 @@ SELECT @@global.innodb_max_dirty_pages_p
 
 SET @@global.innodb_max_dirty_pages_pct = 1;
 SELECT @@global.innodb_max_dirty_pages_pct;
-SET @@global.innodb_max_dirty_pages_pct = 100;
+SET @@global.innodb_max_dirty_pages_pct = 99;
 SELECT @@global.innodb_max_dirty_pages_pct;
 
 --echo '#--------------------FN_DYNVARS_046_04-------------------------#'

=== modified file 'mysql-test/suite/sys_vars/t/innodb_thread_concurrency_basic.test'
--- a/mysql-test/suite/sys_vars/t/innodb_thread_concurrency_basic.test	2008-12-19 15:12:15 +0000
+++ b/mysql-test/suite/sys_vars/t/innodb_thread_concurrency_basic.test	2009-02-17 12:24:09 +0000
@@ -4,7 +4,7 @@
 # Scope: GLOBAL                                                               #
 # Access Type: Dynamic                                                        #
 # Data Type: Numeric                                                          #
-# Default Value: 8                                                            #
+# Default Value: 0                                                            #
 # Range: 0-1000                                                               #
 #                                                                             #
 #                                                                             #

=== modified file 'mysql-test/suite/sys_vars/t/table_definition_cache_basic.test'
--- a/mysql-test/suite/sys_vars/t/table_definition_cache_basic.test	2008-12-19 15:12:15 +0000
+++ b/mysql-test/suite/sys_vars/t/table_definition_cache_basic.test	2009-02-17 12:24:09 +0000
@@ -4,7 +4,7 @@
 # Scope: GLOBAL                                                                #
 # Access Type: Dynamic                                                         #
 # Data Type: Numeric                                                           #
-# Default Value: 128                                                           #
+# Default Value: 400                                                           #
 # Range: 1 - 524288                                                            #
 #                                                                              #
 #                                                                              #
@@ -52,7 +52,7 @@ SELECT @@global.table_definition_cache;
 ###############################################
 
 SET @@global.table_definition_cache = DEFAULT;
-SELECT @@global.table_definition_cache = 128;
+SELECT @@global.table_definition_cache = 400;
 
 
 --echo '#--------------------FN_DYNVARS_019_03------------------------#'

=== modified file 'mysql-test/suite/sys_vars/t/table_open_cache_basic.test'
--- a/mysql-test/suite/sys_vars/t/table_open_cache_basic.test	2008-12-19 15:12:15 +0000
+++ b/mysql-test/suite/sys_vars/t/table_open_cache_basic.test	2009-02-17 12:24:09 +0000
@@ -4,8 +4,8 @@
 # Scope: GLOBAL                                                               #
 # Access Type: Dynamic                                                        #
 # Data Type: numeric                                                          #
-# Default Value: 64                                                           #
-# Range: 1-524288                                                             #
+# Default Value: 400                                                         #
+# Range: 64-524288                                                             #
 #                                                                             #
 #                                                                             #
 # Creation Date: 2008-02-13                                                   #
@@ -54,7 +54,7 @@ SELECT @@global.table_open_cache;
 ############################################### 
 
 SET @@global.table_open_cache = Default;
-SELECT @@global.table_open_cache  = 64;
+SELECT @@global.table_open_cache  = 400;
 
 --echo '#--------------------FN_DYNVARS_001_03------------------------#'
 ######################################################################## 

=== modified file 'mysql-test/t/merge.test'
--- a/mysql-test/t/merge.test	2009-04-22 10:02:28 +0000
+++ b/mysql-test/t/merge.test	2009-07-02 16:13:21 +0000
@@ -1412,11 +1412,11 @@ set @save_table_definition_cache=@@globa
 --echo #
 --echo # Set @@global.table_definition_cache to minimum
 --echo #
-set @@global.table_definition_cache=256;
+set @@global.table_definition_cache=400;
 set @a=null;
-let $1 = 256;
+let $1 = 400;
 --echo #
---echo # Create 256 merge children
+--echo # Create 400 merge children
 --echo #
 --disable_query_log
 while ($1)
@@ -1448,7 +1448,7 @@ deallocate prepare stmt;
 --echo #
 --echo # Cleanup
 --echo #
-let $1 = 256;
+let $1 = 400;
 --disable_query_log
 while ($1)
 {

=== modified file 'mysql-test/t/partition_innodb.test'
--- a/mysql-test/t/partition_innodb.test	2008-12-13 11:02:16 +0000
+++ b/mysql-test/t/partition_innodb.test	2009-07-02 14:23:36 +0000
@@ -25,17 +25,21 @@ SET autocommit = 0;
 
 UPDATE t1 SET DATA = data*2 WHERE id = 3;
 
+# SHOW ENGINE InnoDB STATUS does not show transaction info in
+# PERFORMANCE-VERSION
 # grouping/referencing in replace_regex is very slow on long strings,
 # removing all before/after the interesting row before grouping/referencing
---replace_regex /.*---TRANSACTION [0-9]+ [0-9]+, .*, OS thread id [0-9]+// /MySQL thread id [0-9]+, query id [0-9]+ .*// /.*([0-9]+ lock struct\(s\)), heap size [0-9]+, ([0-9]+ row lock\(s\)).*/\1 \2/
-SHOW ENGINE InnoDB STATUS;
+#--replace_regex /.*---TRANSACTION [0-9]+ [0-9]+, .*, OS thread id [0-9]+// /MySQL thread id [0-9]+, query id [0-9]+ .*// /.*([0-9]+) lock struct\(s\), heap size [0-9]+, ([0-9]+) row lock\(s\).*/\1 lock struct(s) \2 row lock(s)/
+#SHOW ENGINE InnoDB STATUS;
 
 UPDATE t1 SET data = data*2 WHERE data = 2;
 
+# SHOW ENGINE InnoDB STATUS does not show transaction info in
+# PERFORMANCE-VERSION
 # grouping/referencing in replace_regex is very slow on long strings,
 # removing all before/after the interesting row before grouping/referencing
---replace_regex /.*---TRANSACTION [0-9]+ [0-9]+, .*, OS thread id [0-9]+// /MySQL thread id [0-9]+, query id [0-9]+ .*// /.*([0-9]+ lock struct\(s\)), heap size [0-9]+, ([0-9]+ row lock\(s\)).*/\1 \2/
-SHOW ENGINE InnoDB STATUS;
+#--replace_regex /.*---TRANSACTION [0-9]+ [0-9]+, .*, OS thread id [0-9]+// /MySQL thread id [0-9]+, query id [0-9]+ .*// /.*([0-9]+ lock struct\(s\)), heap size [0-9]+, ([0-9]+ row lock\(s\)).*/\1 \2/
+#SHOW ENGINE InnoDB STATUS;
 
 SET @@session.tx_isolation = @old_tx_isolation;
 

=== modified file 'mysys/Makefile.am'
--- a/mysys/Makefile.am	2009-02-07 15:47:14 +0000
+++ b/mysys/Makefile.am	2009-07-02 14:23:36 +0000
@@ -25,6 +25,7 @@ LDADD =			$(top_builddir)/strings/libmys
 pkglib_LTLIBRARIES =	libmysys.la
 libmysys_la_LDFLAGS = 	-static
 libmysys_la_SOURCES =   
+libmysys_la_DEPENDENCIES =   
 # This can't be listed here as $(top_builddir)/mysys/libmysyslt.la
 # or it breaks make's dependency track for -j builds
 libmysys_la_LIBADD =	libmysyslt.la libmysyswrap.la
@@ -87,3 +88,24 @@ DEFS =			-DDEFAULT_BASEDIR=\"$(prefix)\"
 			-DDEFAULT_GROUP_SUFFIX_ENV=MYSQL_GROUP_SUFFIX \
 			-DDEFAULT_SYSCONFDIR="\"$(sysconfdir)\"" \
                         @DEFS@
+
+if HAVE_DTRACE_DASH_G
+libmysys_la_LIBADD += probes_mysql.o
+libmysys_la_DEPENDENCIES += probes_mysql.o dtrace_files dtrace_providers
+CLEANFILES = probes_mysql.o dtrace_files dtrace_providers
+DTRACEFILES = mf_keycache.o
+DTRACEPROVIDER = probes_mysql.d
+CLEANFILES += $(DTRACEPROVIDER) dtrace_sources
+
+dtrace_files:
+	echo $(DTRACEFILES) > $@
+dtrace_providers: probes_mysql.d
+	echo $(DTRACEPROVIDER) > $@
+probes_mysql.d:
+	-$(RM) -f probes_mysql.d
+	$(CP) $(top_srcdir)/include/probes_mysql.d.base probes_mysql.d
+	echo timestamp > dtrace_sources
+
+probes_mysql.o: $(DTRACEPROVIDER) $(DTRACEFILES)
+	$(DTRACE) $(DTRACEFLAGS) -G -s $(DTRACEPROVIDER) $(DTRACEFILES) -o $@
+endif

=== modified file 'mysys/mf_keycache.c'
--- a/mysys/mf_keycache.c	2009-04-30 14:35:36 +0000
+++ b/mysys/mf_keycache.c	2009-07-02 14:23:36 +0000
@@ -112,6 +112,7 @@
 #include <my_bit.h>
 #include <errno.h>
 #include <stdarg.h>
+#include "probes_mysql.h"
 
 /*
   Some compilation flags have been added specifically for this module
@@ -2581,6 +2582,15 @@ uchar *key_cache_read(KEY_CACHE *keycach
     uint offset;
     int page_st;
 
+    if (MYSQL_KEYCACHE_READ_START_ENABLED())
+    {
+      MYSQL_KEYCACHE_READ_START(my_filename(file), length,
+                                (ulong) (keycache->blocks_used *
+                                         keycache->key_cache_block_size),
+                                (ulong) (keycache->blocks_unused *
+                                         keycache->key_cache_block_size));
+    }
+  
     /*
       When the key cache is once initialized, we use the cache_lock to
       reliably distinguish the cases of normal operation, resizing, and
@@ -2633,6 +2643,9 @@ uchar *key_cache_read(KEY_CACHE *keycach
 
       /* Request the cache block that matches file/pos. */
       keycache->global_cache_r_requests++;
+
+      MYSQL_KEYCACHE_READ_BLOCK(keycache->key_cache_block_size);
+
       block=find_key_block(keycache, file, filepos, level, 0, &page_st);
       if (!block)
       {
@@ -2652,6 +2665,7 @@ uchar *key_cache_read(KEY_CACHE *keycach
       {
         if (page_st != PAGE_READ)
         {
+          MYSQL_KEYCACHE_READ_MISS();
           /* The requested page is to be read into the block buffer */
           read_block(keycache, block,
                      keycache->key_cache_block_size, read_length+offset,
@@ -2676,6 +2690,10 @@ uchar *key_cache_read(KEY_CACHE *keycach
           my_errno= -1;
           block->status|= BLOCK_ERROR;
         }
+        else
+        {
+          MYSQL_KEYCACHE_READ_HIT();
+        }
       }
 
       /* block status may have added BLOCK_ERROR in the above 'if'. */
@@ -2728,7 +2746,16 @@ uchar *key_cache_read(KEY_CACHE *keycach
 #ifndef THREAD
       /* This is only true if we where able to read everything in one block */
       if (return_buffer)
+      {
+        if (MYSQL_KEYCACHE_READ_DONE_ENABLED())
+        {
+          MYSQL_KEYCACHE_READ_DONE((ulong) (keycache->blocks_used *
+                                            keycache->key_cache_block_size),
+                                   (ulong) (keycache->blocks_unused *
+                                            keycache->key_cache_block_size));
+        }
 	DBUG_RETURN(block->buffer);
+      }
 #endif
   next_block:
       buff+= read_length;
@@ -2736,6 +2763,13 @@ uchar *key_cache_read(KEY_CACHE *keycach
       offset= 0;
 
     } while ((length-= read_length));
+    if (MYSQL_KEYCACHE_READ_DONE_ENABLED())
+    {
+      MYSQL_KEYCACHE_READ_DONE((ulong) (keycache->blocks_used *
+                                        keycache->key_cache_block_size),
+                               (ulong) (keycache->blocks_unused *
+                                        keycache->key_cache_block_size));
+    }
     goto end;
   }
   KEYCACHE_DBUG_PRINT("key_cache_read", ("keycache not initialized"));
@@ -3080,6 +3114,15 @@ int key_cache_write(KEY_CACHE *keycache,
     uint offset;
     int page_st;
 
+    if (MYSQL_KEYCACHE_WRITE_START_ENABLED())
+    {
+      MYSQL_KEYCACHE_WRITE_START(my_filename(file), length,
+                                 (ulong) (keycache->blocks_used *
+                                          keycache->key_cache_block_size),
+                                 (ulong) (keycache->blocks_unused *
+                                          keycache->key_cache_block_size));
+    }
+
     /*
       When the key cache is once initialized, we use the cache_lock to
       reliably distinguish the cases of normal operation, resizing, and
@@ -3115,6 +3158,8 @@ int key_cache_write(KEY_CACHE *keycache,
       /* Cache could be disabled in a later iteration. */
       if (!keycache->can_be_used)
 	goto no_key_cache;
+
+      MYSQL_KEYCACHE_WRITE_BLOCK(keycache->key_cache_block_size);
       /* Start writing at the beginning of the cache block. */
       filepos-= offset;
       /* Do not write beyond the end of the cache block. */
@@ -3332,6 +3377,15 @@ end:
     dec_counter_for_resize_op(keycache);
     keycache_pthread_mutex_unlock(&keycache->cache_lock);
   }
+  
+  if (MYSQL_KEYCACHE_WRITE_DONE_ENABLED())
+  {
+    MYSQL_KEYCACHE_WRITE_DONE((ulong) (keycache->blocks_used *
+                                       keycache->key_cache_block_size),
+                              (ulong) (keycache->blocks_unused *
+                                       keycache->key_cache_block_size));
+  }
+  
 #if !defined(DBUG_OFF) && defined(EXTRA_DEBUG)
   DBUG_EXECUTE("exec",
                test_key_cache(keycache, "end of key_cache_write", 1););

=== modified file 'scripts/Makefile.am'
--- a/scripts/Makefile.am	2009-03-20 16:14:49 +0000
+++ b/scripts/Makefile.am	2009-07-02 14:23:36 +0000
@@ -36,7 +36,8 @@ bin_SCRIPTS =		@server_scripts@ \
 			mysqld_multi
 
 noinst_SCRIPTS =	make_binary_distribution \
-			make_sharedlib_distribution
+			make_sharedlib_distribution \
+			dheadgen.pl
 
 EXTRA_SCRIPTS =		make_binary_distribution.sh \
 			make_sharedlib_distribution.sh \
@@ -57,7 +58,8 @@ EXTRA_SCRIPTS =		make_binary_distributio
 			mysqlhotcopy.sh \
 			mysqldumpslow.sh \
 			mysqld_multi.sh \
-			mysqld_safe.sh
+			mysqld_safe.sh \
+			dheadgen.pl
 
 EXTRA_DIST =		$(EXTRA_SCRIPTS) \
 			mysqlaccess.conf \
@@ -121,7 +123,7 @@ mysql_fix_privilege_tables_sql.c: comp_s
 	sleep 2
 	$(top_builddir)/scripts/comp_sql$(EXEEXT) \
 	  mysql_fix_privilege_tables \
-	    $(top_srcdir)/scripts/mysql_fix_privilege_tables.sql $@
+	    $(top_builddir)/scripts/mysql_fix_privilege_tables.sql $@
 
 
 SUFFIXES = .sh

=== added file 'scripts/dheadgen.pl'
--- a/scripts/dheadgen.pl	1970-01-01 00:00:00 +0000
+++ b/scripts/dheadgen.pl	2009-06-04 16:47:38 +0000
@@ -0,0 +1,338 @@
+#!/usr/bin/perl -w
+
+#
+# Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are met:
+# 
+#    * Redistributions of source code must retain the above copyright
+#      notice, this list of conditions and the following disclaimer. 
+#    * Redistributions in binary form must reproduce the above copyright
+#      notice, this list of conditions and the following disclaimer in  
+#      the documentation and/or other materials provided with the       
+#      distribution.
+#    * Neither the name of the above-listed copyright holders nor the names
+#      of its contributors may be used to endorse or promote products derived
+#      from this software without specific prior written permission.  
+#       
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+# IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED  
+# TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+# PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+# OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,     
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR      
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF  
+# LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING    
+# NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS      
+# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# ident	"@(#)dheadgen.pl	1.4	07/06/24 SMI"
+
+#
+# DTrace Header Generator
+# -----------------------
+#
+# This script is meant to mimic the output of dtrace(1M) with the -h
+# (headergen) flag on system that lack native support for DTrace. This script
+# is intended to be integrated into projects that use DTrace's static tracing
+# facilities (USDT), and invoked as part of the build process to have a
+# common build process on all target systems. To facilitate this, this script
+# is licensed under a BSD license. On system with native DTrace support, the
+# dtrace(1M) command will be invoked to create the full header file; on other
+# systems, this script will generated a stub header file.
+#
+# Normally, generated macros take the form PROVIDER_PROBENAME().  It may be
+# desirable to customize the output of this script and of dtrace(1M) to
+# tailor the precise macro name. To do this, edit the emit_dtrace() subroutine
+# to pattern match for the lines you want to customize.
+#
+
+use strict;
+
+my @lines;
+my @tokens = ();
+my $lineno = 0;
+my $newline = 1;
+my $eof = 0;
+my $infile;
+my $outfile;
+my $force = 0;
+
+sub emit_dtrace {
+	my ($line) = @_;
+
+	#
+	# Insert customization here. For example, if you want to change the
+	# name of the macros you may do something like this:
+	#
+	# $line =~ s/(\s)[A-Z]+_/\1TRACE_MOZILLA_/;
+	#
+
+	print $line;
+}
+
+#
+# The remaining code deals with parsing D provider definitions and emitting
+# the stub header file. There should be no need to edit this absent a bug.
+#
+
+#
+# Emit the two relevant macros for each probe in the given provider:
+#    PROVIDER_PROBENAME(<args>)
+#    PROVIDER_PROBENAME_ENABLED() (0)
+#
+sub emit_provider {
+	my ($provname, @probes) = @_;
+
+	$provname = uc($provname);
+
+	foreach my $probe (@probes) {
+		my $probename = uc($$probe{'name'});
+		my $argc = $$probe{'argc'};
+		my $line;
+
+		$probename =~ s/__/_/g;
+
+		$line = "#define\t${provname}_${probename}(";
+		for (my $i = 0; $i < $argc; $i++) {
+			$line .= ($i == 0 ? '' : ', ');
+			$line .= "arg$i";
+		}
+		$line .= ")\n";
+		emit_dtrace($line);
+		
+		$line = "#define\t${provname}_${probename}_ENABLED() (0)\n";
+		emit_dtrace($line);
+	}
+
+	emit_dtrace("\n");
+}
+
+sub emit_prologue {
+	my ($filename) = @_;
+
+	$filename =~ s/.*\///g;
+	$filename = uc($filename);
+	$filename =~ s/\./_/g;
+
+	emit_dtrace <<"EOF";
+/*
+ * Generated by dheadgen(1).
+ */
+
+#ifndef\t_${filename}
+#define\t_${filename}
+
+#ifdef\t__cplusplus
+extern "C" {
+#endif
+
+EOF
+}
+
+sub emit_epilogue {
+	my ($filename) = @_;
+
+	$filename =~ s/.*\///g;
+	$filename = uc($filename);
+	$filename =~ s/\./_/g;
+
+	emit_dtrace <<"EOF";
+#ifdef  __cplusplus
+}
+#endif
+
+#endif  /* _$filename */
+EOF
+}
+
+#
+# Get the next token from the file keeping track of the line number.
+#
+sub get_token {
+	my ($eof_ok) = @_;
+	my $tok;
+
+	while (1) {
+		while (scalar(@tokens) == 0) {
+			if (scalar(@lines) == 0) {
+				$eof = 1;
+				return if ($eof_ok);
+				die "expected more data at line $lineno";
+			}
+
+			$lineno++;
+			push(@tokens, split(/(\s+|\n|[(){},#;]|\/\*|\*\/)/,
+			    shift(@lines)));
+		}
+
+		$tok = shift(@tokens);
+		next if ($tok eq '');
+		next if ($tok =~ /^[ \t]+$/);
+
+		return ($tok);
+	}
+}
+
+#
+# Ignore newlines, comments and typedefs
+#
+sub next_token {
+	my ($eof_ok) = @_;
+	my $tok;
+
+	while (1) {
+		$tok = get_token($eof_ok);
+		return if ($eof_ok && $eof);
+		if ($tok eq "typedef" or $tok =~ /^#/) {
+		  while (1) {
+		    $tok = get_token(0);
+		    last if ($tok eq "\n");
+		  }
+		  next;
+		} elsif ($tok eq '/*') {
+			while (get_token(0) ne '*/') {
+				next;
+			}
+			next;
+		} elsif ($tok eq "\n") {
+			next;
+		}
+
+		last;
+	}
+
+	return ($tok);
+}
+
+sub expect_token {
+	my ($t) = @_;
+	my $tok;
+
+	while (($tok = next_token(0)) eq "\n") {
+		next;
+	}
+
+	die "expected '$t' at line $lineno rather than '$tok'" if ($t ne $tok);
+}
+
+sub get_args {
+	expect_token('(');
+
+	my $tok = next_token(0);
+	my @args = ();
+
+	return (@args) if ($tok eq ')');
+
+	if ($tok eq 'void') {
+		expect_token(')');
+		return (@args);
+	}
+
+	my $arg = $tok;
+
+	while (1) {
+		$tok = next_token(0);
+		if ($tok eq ',' || $tok eq ')') {
+			push(@args, $arg);
+			$arg = '';
+			last if ($tok eq ')');
+		} else {
+			$arg = "$arg $tok";
+		}
+	}
+
+	return (@args);
+}
+
+sub usage {
+	die "usage: $0 [-f] <filename.d>\n";
+}
+
+usage() if (scalar(@ARGV) < 1);
+if ($ARGV[0] eq '-f') {
+	usage() if (scalar(@ARGV < 2));
+	$force = 1;
+	shift;
+}
+$infile = $ARGV[0];
+usage() if ($infile !~ /(.+)\.d$/);
+
+#
+# If the system has native support for DTrace, we'll use that binary instead.
+#
+if (-x '/usr/sbin/dtrace' && !$force) {
+	open(DTRACE, "-| /usr/sbin/dtrace -C -h -s $infile -o /dev/stdout")
+	    or die "can't invoke dtrace(1M)";
+
+	while (<DTRACE>) {
+		emit_dtrace($_);
+	}
+
+	close(DTRACE);
+
+	exit(0);
+}
+
+emit_prologue($infile);
+
+open(D, "< $infile") or die "couldn't open $infile";
+@lines = <D>;
+close(D);
+
+while (1) {
+	my $nl = 0;
+	my $tok = next_token(1);
+	last if $eof;
+
+	if ($newline && $tok eq '#') {
+		while (1) {
+			$tok = get_token(0);
+
+			last if ($tok eq "\n");
+		}
+		$nl = 1;
+	} elsif ($tok eq "\n") {
+		$nl = 1;
+	} elsif ($tok eq 'provider') {
+		my $provname = next_token(0);
+		my @probes = ();
+		expect_token('{');
+
+		while (1) {
+			$tok = next_token(0);
+			if ($tok eq 'probe') {
+				my $probename = next_token(0);
+				my @args = get_args();
+
+				next while (next_token(0) ne ';');
+
+				push(@probes, {
+				    'name' => $probename,
+				    'argc' => scalar(@args)
+				});
+
+			} elsif ($tok eq '}') {
+				expect_token(';');
+
+				emit_provider($provname, @probes);
+
+				last;
+			}
+		}
+
+	} else {
+		die "syntax error at line $lineno near '$tok'\n";
+	}
+
+	$newline = $nl;
+}
+
+emit_epilogue($infile);
+
+exit(0);

=== modified file 'scripts/make_binary_distribution.sh'
--- a/scripts/make_binary_distribution.sh	2009-04-14 21:07:28 +0000
+++ b/scripts/make_binary_distribution.sh	2009-07-02 14:23:36 +0000
@@ -342,7 +342,6 @@ BIN_FILES="extra/comp_err$BS extra/repla
   storage/myisam/myisamlog$BS storage/myisam/myisam_ftdump$BS \
   sql/mysqld$BS sql/mysqld-debug$BS \
   sql/mysql_tzinfo_to_sql$BS \
-  server-tools/instance-manager/mysqlmanager$BS \
   client/mysql$BS client/mysqlshow$BS client/mysqladmin$BS \
   client/mysqlslap$BS \
   client/mysqldump$BS client/mysqlimport$BS \

=== modified file 'sql/Makefile.am'
--- a/sql/Makefile.am	2009-06-08 14:58:33 +0000
+++ b/sql/Makefile.am	2009-07-02 15:02:45 +0000
@@ -28,23 +28,40 @@ SUBDIRS =		share backup
 libexec_PROGRAMS =	mysqld
 EXTRA_PROGRAMS =	gen_lex_hash
 bin_PROGRAMS =		mysql_tzinfo_to_sql
-DTRACE =                @DTRACE@
-DTRACEFLAGS =           @DTRACEFLAGS@
 DTRACEFILES =           filesort.o \
-			handler.o \
-			mysqld.o \
-			net_serv.o \
-			scheduler.o \
-			sp_head.o \
-			sql_cache.o \
-			sql_connect.o \
-			sql_cursor.o \
-			sql_delete.o \
-			sql_insert.o \
-			sql_parse.o \
-			sql_prepare.o \
-			sql_select.o \
-			sql_update.o
+			.libs/libndb_la-ha_ndbcluster.o \
+                        handler.o \
+                        mysqld.o \
+                        net_serv.o \
+                        scheduler.o \
+                        sp_head.o \
+                        sql_cache.o \
+                        sql_connect.o \
+                        sql_cursor.o \
+                        sql_delete.o \
+                        sql_insert.o \
+                        sql_parse.o \
+                        sql_prepare.o \
+                        sql_select.o \
+                        sql_update.o
+
+DTRACEFILES_DEPEND =    filesort.o \
+			libndb_la-ha_ndbcluster.lo \
+                        handler.o \
+                        mysqld.o \
+                        net_serv.o \
+                        scheduler.o \
+                        sp_head.o \
+                        sql_cache.o \
+                        sql_connect.o \
+                        sql_cursor.o \
+                        sql_delete.o \
+                        sql_insert.o \
+                        sql_parse.o \
+                        sql_prepare.o \
+                        sql_select.o \
+                        sql_update.o
+
 
 noinst_LTLIBRARIES=	libndb.la \
 			udf_example.la
@@ -100,7 +117,7 @@ noinst_HEADERS =	item.h item_func.h item
 			sql_plugin.h authors.h event_parse_data.h \
 			event_data_objects.h event_scheduler.h \
 			sql_partition.h partition_info.h partition_element.h \
-			probes.h sql_audit.h transaction.h \
+			sql_audit.h transaction.h \
 			contributors.h sql_servers.h bml.h \
 			si_objects.h si_logs.h sql_plist.h mdl.h records.h \
 			sql_signal.h \
@@ -157,10 +174,6 @@ mysqld_SOURCES =	sql_lex.cc sql_handler.
 			sql_signal.cc \
 			rpl_handler.cc
 
-if HAVE_DTRACE
-  mysqld_SOURCES += probes.d
-endif
-
 nodist_mysqld_SOURCES =	mini_client_errors.c pack.c client.c my_time.c my_user.c 
 
 libndb_la_CPPFLAGS=	@ndbcluster_includes@ $(AM_CPPFLAGS)
@@ -223,12 +236,6 @@ lex_hash.h:	gen_lex_hash.cc lex.h
 udf_example_la_SOURCES= udf_example.c
 udf_example_la_LDFLAGS= -module -rpath $(pkglibdir)
 
-probes.h: probes.d
-	$(DTRACE) $(DTRACEFLAGS) -h -s probes.d
-	mv probes.h probes.h.bak
-	sed "s/#include <unistd.h>//g" probes.h.bak > probes.h
-	rm probes.h.bak
-
 # We might have some stuff not built in this build, but that we want to install
 install-exec-hook:
 	$(mkinstalldirs) $(DESTDIR)$(libexecdir) $(DESTDIR)$(pkglibdir)
@@ -236,9 +243,38 @@ install-exec-hook:
 	test ! -f mysqld-debug.sym.gz   || $(INSTALL_DATA)    mysqld-debug.sym.gz   $(DESTDIR)$(pkglibdir)
 	test ! -f mysqld.sym.gz         || $(INSTALL_DATA)    mysqld.sym.gz         $(DESTDIR)$(pkglibdir)
 
-SUFFIXES : .d
-
-.d.o :
-	$(DTRACE) $(DTRACEFLAGS) -G -s $< $(DTRACEFILES)
+if HAVE_DTRACE_DASH_G
+libndb_la_LIBADD = probes_libndb.o
+libndb_la_DEPENDENCIES = dtrace_files dtrace_providers probes_libndb.o
+mysqld_LDADD += probes_all.o
+mysqld_DEPENDENCIES += dtrace_files dtrace_providers probes_all.o
+CLEANFILES += dtrace_files dtrace_providers probes_all.o
+DTRACEPROVIDER = probes_mysql.d
+CLEANFILES += $(DTRACEPROVIDER)
+
+dtrace_files:
+	echo $(DTRACEFILES) > $@
+dtrace_providers: probes_mysql.d
+	echo $(DTRACEPROVIDER) > $@
+probes_mysql.d:
+	-$(RM) -f probes_mysql.d
+	$(CP) $(top_srcdir)/include/probes_mysql.d.base probes_mysql.d
+
+DTRACEDIRS = . ../mysys $(patsubst %,$(top_builddir)/storage/%,@mysql_se_dirs@)
+
+probes_all.o: probes_mysql.d $(DTRACEFILES_DEPEND)
+	providers=`(for i in $(DTRACEDIRS); do cat $$i/dtrace_providers 2>/dev/null; done) | tr " " "\n" | sort | uniq | sed -e '/^$$/d' -e 's/^/-s /'`; \
+	objects=`for i in $(DTRACEDIRS); do f=\`cat $$i/dtrace_files 2>/dev/null\`; for j in $$f; do test -f $$i/$$j && echo "$$i/$$j "; done; done`; \
+	$(DTRACE) $(DTRACEFLAGS) -G $$providers $$objects -o $@
+
+# Can't depend directly on .libs/*.o, because there is no generated rule for
+# that in the Makefile; it is a byproduct of *.lo
+probes_libndb.o: probes_mysql.d libndb_la-ha_ndbcluster.lo
+	if test -f .libs/libndb_la-ha_ndbcluster.o ; then \
+		$(DTRACE) $(DTRACEFLAGS) -G -s probes_mysql.d .libs/libndb_la-ha_ndbcluster.o -o $@; \
+	fi; \
+	if test -f libndb_la-ha_ndbcluster.o ; then \
+		$(DTRACE) $(DTRACEFLAGS) -G -s probes_mysql.d libndb_la-ha_ndbcluster.o -o $@; \
+	fi
 
-probes.o : $(DTRACEFILES)
+endif

=== modified file 'sql/filesort.cc'
--- a/sql/filesort.cc	2009-02-13 16:30:54 +0000
+++ b/sql/filesort.cc	2009-07-02 14:23:36 +0000
@@ -27,6 +27,7 @@
 #endif
 #include <m_ctype.h>
 #include "sql_sort.h"
+#include "probes_mysql.h"
 
 #ifndef THREAD
 #define SKIP_DBUG_IN_FILESORT

=== modified file 'sql/ha_ndbcluster.cc'
--- a/sql/ha_ndbcluster.cc	2009-05-15 13:45:06 +0000
+++ b/sql/ha_ndbcluster.cc	2009-07-02 14:23:36 +0000
@@ -29,6 +29,14 @@
 #include "mysql_priv.h"
 #include "rpl_mi.h"
 
+/*
+  There is an incompatibility between GNU ar and the Solaris linker
+  which makes the Solaris linker return an elf error when compiling
+  without NDB support (which makes libndb.a an empty library).
+  To avoid this we add a dummy declaration of a static variable
+  which makes us avoid this bug.
+*/
+int ha_ndb_dummy;
 #include <my_dir.h>
 #ifdef WITH_NDBCLUSTER_STORAGE_ENGINE
 #include "ha_ndbcluster.h"
@@ -42,6 +50,7 @@
 #include "ha_ndbcluster_connection.h"
 
 #include <mysql/plugin.h>
+#include "probes_mysql.h"
 
 #ifdef ndb_dynamite
 #undef assert
@@ -141,6 +150,13 @@ static uint ndbcluster_alter_partition_f
   DBUG_RETURN(ndb_to_mysql_error(&tmp)); \
 }
 
+#define ERR_RETURN_PREPARE(rc, err)                  \
+{                                        \
+  const NdbError& tmp= err;              \
+  set_ndb_err(current_thd, tmp);         \
+  rc= ndb_to_mysql_error(&tmp); \
+}
+
 #define ERR_BREAK(err, code)             \
 {                                        \
   const NdbError& tmp= err;              \
@@ -3707,9 +3723,11 @@ int ha_ndbcluster::index_read(uchar *buf
 {
   key_range start_key;
   bool descending= FALSE;
+  int rc;
   DBUG_ENTER("ha_ndbcluster::index_read");
   DBUG_PRINT("enter", ("active_index: %u, key_len: %u, find_flag: %d", 
                        active_index, key_len, find_flag));
+  MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
 
   start_key.key= key;
   start_key.length= key_len;
@@ -3725,43 +3743,61 @@ int ha_ndbcluster::index_read(uchar *buf
   default:
     break;
   }
-  DBUG_RETURN(read_range_first_to_buf(&start_key, 0, descending,
-                                      m_sorted, buf));
+  rc= read_range_first_to_buf(&start_key, 0, descending,
+                              m_sorted, buf);
+  MYSQL_INDEX_READ_ROW_DONE(rc);
+  DBUG_RETURN(rc);
 }
 
 
 int ha_ndbcluster::index_next(uchar *buf)
 {
+  int rc;
   DBUG_ENTER("ha_ndbcluster::index_next");
+  MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); 
   ha_statistic_increment(&SSV::ha_read_next_count);
-  DBUG_RETURN(next_result(buf));
+  rc= next_result(buf);
+  MYSQL_INDEX_READ_ROW_DONE(rc);
+  DBUG_RETURN(rc);
 }
 
 
 int ha_ndbcluster::index_prev(uchar *buf)
 {
+  int rc;
   DBUG_ENTER("ha_ndbcluster::index_prev");
+  MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); 
   ha_statistic_increment(&SSV::ha_read_prev_count);
-  DBUG_RETURN(next_result(buf));
+  rc= next_result(buf);
+  MYSQL_INDEX_READ_ROW_DONE(rc);
+  DBUG_RETURN(rc);
 }
 
 
 int ha_ndbcluster::index_first(uchar *buf)
 {
+  int rc;
   DBUG_ENTER("ha_ndbcluster::index_first");
+  MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str); 
   ha_statistic_increment(&SSV::ha_read_first_count);
   // Start the ordered index scan and fetch the first row
 
   // Only HA_READ_ORDER indexes get called by index_first
-  DBUG_RETURN(ordered_index_scan(0, 0, TRUE, FALSE, buf, NULL));
+  rc= ordered_index_scan(0, 0, TRUE, FALSE, buf, NULL);
+  MYSQL_INDEX_READ_ROW_DONE(rc);
+  DBUG_RETURN(rc);
 }
 
 
 int ha_ndbcluster::index_last(uchar *buf)
 {
+  int rc;
   DBUG_ENTER("ha_ndbcluster::index_last");
+  MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
   ha_statistic_increment(&SSV::ha_read_last_count);
-  DBUG_RETURN(ordered_index_scan(0, 0, TRUE, TRUE, buf, NULL));
+  rc= ordered_index_scan(0, 0, TRUE, TRUE, buf, NULL);
+  MYSQL_INDEX_READ_ROW_DONE(rc);
+  DBUG_RETURN(rc);
 }
 
 int ha_ndbcluster::index_read_last(uchar * buf, const uchar * key, uint key_len)
@@ -3852,16 +3888,24 @@ int ha_ndbcluster::read_range_first(cons
                                     const key_range *end_key,
                                     bool eq_r, bool sorted)
 {
+  int rc;
   uchar* buf= table->record[0];
   DBUG_ENTER("ha_ndbcluster::read_range_first");
-  DBUG_RETURN(read_range_first_to_buf(start_key, end_key, FALSE,
-                                      sorted, buf));
+  MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
+  rc= read_range_first_to_buf(start_key, end_key, FALSE,
+                              sorted, buf);
+  MYSQL_INDEX_READ_ROW_DONE(rc);
+  DBUG_RETURN(rc);
 }
 
 int ha_ndbcluster::read_range_next()
 {
+  int rc;
   DBUG_ENTER("ha_ndbcluster::read_range_next");
-  DBUG_RETURN(next_result(table->record[0]));
+  MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
+  rc= next_result(table->record[0]);
+  MYSQL_INDEX_READ_ROW_DONE(rc);
+  DBUG_RETURN(rc);
 }
 
 
@@ -3932,12 +3976,18 @@ int ha_ndbcluster::rnd_end()
 
 int ha_ndbcluster::rnd_next(uchar *buf)
 {
+  int rc;
   DBUG_ENTER("rnd_next");
+  MYSQL_READ_ROW_START(table_share->db.str, table_share->table_name.str,
+                       TRUE);
   ha_statistic_increment(&SSV::ha_read_rnd_next_count);
 
   if (!m_active_cursor)
-    DBUG_RETURN(full_table_scan(NULL, NULL, 0, buf));
-  DBUG_RETURN(next_result(buf));
+    rc= full_table_scan(NULL, NULL, 0, buf);
+  else
+    rc= next_result(buf);
+  MYSQL_READ_ROW_DONE(rc);
+  DBUG_RETURN(rc);
 }
 
 
@@ -3949,7 +3999,10 @@ int ha_ndbcluster::rnd_next(uchar *buf)
 
 int ha_ndbcluster::rnd_pos(uchar *buf, uchar *pos)
 {
+  int rc;
   DBUG_ENTER("rnd_pos");
+  MYSQL_READ_ROW_START(table_share->db.str, table_share->table_name.str,
+                       FALSE);
   ha_statistic_increment(&SSV::ha_read_rnd_count);
   // The primary key for the record is stored in pos
   // Perform a pk_read using primary key "index"
@@ -3982,7 +4035,9 @@ int ha_ndbcluster::rnd_pos(uchar *buf, u
       DBUG_PRINT("info", ("partition id %u", part_spec.start_part));
     }
     DBUG_DUMP("key", pos, key_length);
-    DBUG_RETURN(pk_read(pos, key_length, buf, part_spec.start_part));
+    rc= pk_read(pos, key_length, buf, part_spec.start_part);
+    MYSQL_READ_ROW_DONE(rc);
+    DBUG_RETURN(rc);
   }
 }
 
@@ -9516,7 +9571,7 @@ int ha_ndbcluster::multi_range_read_init
     DBUG_RETURN(handler::multi_range_read_init(seq_funcs, seq_init_param,
                                                n_ranges, mode, buffer));
   }
-
+  MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
   m_disable_multi_read= FALSE;
 
   mrr_is_output_sorted= test(mode & HA_MRR_SORTED);
@@ -9563,6 +9618,7 @@ int ha_ndbcluster::multi_range_start_ret
   NDB_INDEX_TYPE cur_index_type= get_index_type(active_index);
   const NdbOperation *oplist[MRR_MAX_RANGES];
   uint num_keyops= 0;
+  int res;
   DBUG_ENTER("multi_range_start_retrievals");
 
   /*
@@ -9695,7 +9751,11 @@ int ha_ndbcluster::multi_range_start_ret
           get_hidden_fields_scan(&options, gets);
 
         if (m_cond && m_cond->generate_scan_filter(&code, &options))
-          ERR_RETURN(code.getNdbError());
+        {
+          ERR_RETURN_PREPARE(res, code.getNdbError());
+          MYSQL_READ_ROW_DONE(res);
+          DBUG_RETURN(res);
+        }
 
         /* Define scan */
         NdbIndexScanOperation *scanOp= m_thd_ndb->trans->scanIndex
@@ -9708,8 +9768,11 @@ int ha_ndbcluster::multi_range_start_ret
            sizeof(NdbScanOperation::ScanOptions));
 
         if (!scanOp)
-          ERR_RETURN(m_thd_ndb->trans->getNdbError());
-
+        {
+          ERR_RETURN_PREPARE(res, m_thd_ndb->trans->getNdbError());
+          MYSQL_READ_ROW_DONE(res);
+          DBUG_RETURN(res);
+        }
         m_multi_cursor= scanOp;
 
         /*
@@ -9731,7 +9794,9 @@ int ha_ndbcluster::multi_range_start_ret
       if (m_multi_cursor->setBound(m_index[active_index].ndb_record_key,
                                    bound))
       {
-        ERR_RETURN(m_thd_ndb->trans->getNdbError());
+        ERR_RETURN_PREPARE(res, m_thd_ndb->trans->getNdbError());
+        MYSQL_READ_ROW_DONE(res);
+        DBUG_RETURN(res);
       }
 
       multi_range_entry_type(row_buf)= enum_ordered_range;
@@ -9756,14 +9821,22 @@ int ha_ndbcluster::multi_range_start_ret
                                          mrr_cur_range.start_key.key,
                                          multi_range_row(row_buf), lm,
                                          ppartitionId)))
-        ERR_RETURN(m_thd_ndb->trans->getNdbError());
+      {
+        ERR_RETURN_PREPARE(res, m_thd_ndb->trans->getNdbError());
+        MYSQL_READ_ROW_DONE(res);
+        DBUG_RETURN(res);
+      }
       oplist[num_keyops++]= op;
       row_buf= multi_range_next_entry(row_buf, reclength);
     }
   }
 
   if (execute_no_commit_ie(this, m_thd_ndb->trans))
-    ERR_RETURN(m_thd_ndb->trans->getNdbError());
+  {
+    ERR_RETURN_PREPARE(res, m_thd_ndb->trans->getNdbError());
+    MYSQL_READ_ROW_DONE(res);
+    DBUG_RETURN(res);
+  }
 
   if (!m_range_res)
   {
@@ -9823,25 +9896,27 @@ int ha_ndbcluster::multi_range_start_ret
           (But we can still safely return an error code in non-debug builds).
         */
         DBUG_ASSERT(FALSE);
+        MYSQL_READ_ROW_DONE(error.code);
         ERR_RETURN(error);      /* purecov: deadcode */
       }
     }
     op_idx++;
   }
-
+  MYSQL_READ_ROW_DONE(0);
   DBUG_RETURN(0);
 }
 
 int ha_ndbcluster::multi_range_read_next(char **range_info)
 {
   int res;
-  DBUG_ENTER("ha_ndbcluster::multi_range_read_next");
+  DBUG_ENTER("ha_ndbcluster::read_multi_range_next");
 
   if (m_disable_multi_read)
   {
     DBUG_RETURN(handler::multi_range_read_next(range_info));
   }
 
+  MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
   for(;;)
   {
 
@@ -9880,12 +9955,12 @@ int ha_ndbcluster::multi_range_read_next
                                  expected_range_no, range_info);
           memcpy(table->record[0], multi_range_row(row_buf),
                  table_share->reclength);
+          MYSQL_INDEX_READ_ROW_DONE(0);
           DBUG_RETURN(0);
 
         case enum_ordered_range:
           /* An index scan range. */
           {
-            int res;
             if ((res= read_multi_range_fetch_next()) != 0)
             {
               multi_range_get_custom(multi_range_buffer,
@@ -9894,6 +9969,7 @@ int ha_ndbcluster::multi_range_read_next
               m_multi_range_result_ptr=
                 multi_range_next_entry(m_multi_range_result_ptr,
                                        table_share->reclength);
+              MYSQL_INDEX_READ_ROW_DONE(res);
               DBUG_RETURN(res);
             }
           }
@@ -9935,7 +10011,7 @@ int ha_ndbcluster::multi_range_read_next
                 pk operation.
               */
               m_active_cursor= m_multi_cursor;
-
+              MYSQL_INDEX_READ_ROW_DONE(0);
               DBUG_RETURN(0);
             }
             else if (current_range_no > expected_range_no)
@@ -9963,15 +10039,20 @@ int ha_ndbcluster::multi_range_read_next
         multi_range_next_entry(m_multi_range_result_ptr, table_share->reclength);
     }
 
-  if (first_running_range == ranges_in_seq)
-    DBUG_RETURN(HA_ERR_END_OF_FILE);
-
-  /*
-    Read remaining ranges
-  */
-  if ((res= multi_range_start_retrievals(first_running_range)))
-    DBUG_RETURN(res);
+    if (first_running_range == ranges_in_seq)
+    {
+      MYSQL_INDEX_READ_ROW_DONE(HA_ERR_END_OF_FILE);
+      DBUG_RETURN(HA_ERR_END_OF_FILE);
+    }
 
+    /*
+      Read remaining ranges
+    */
+    if ((res= multi_range_start_retrievals(first_running_range)))
+    {
+      MYSQL_INDEX_READ_ROW_DONE(res);
+      DBUG_RETURN(res);
+    }
   }
 }
 

=== modified file 'sql/handler.cc'
--- a/sql/handler.cc	2009-06-09 16:53:34 +0000
+++ b/sql/handler.cc	2009-07-02 14:23:36 +0000
@@ -29,6 +29,7 @@
 #include <myisampack.h>
 #include "myisam.h"
 #include "transaction.h"
+#include "probes_mysql.h"
 
 #ifdef WITH_PARTITION_STORAGE_ENGINE
 #include "ha_partition.h"
@@ -5465,6 +5466,9 @@ int handler::ha_external_lock(THD *thd, 
   */
   int error= external_lock(thd, lock_type);
 
+  if (error == 0)
+    cached_table_flags= table_flags();
+
   if (MYSQL_HANDLER_RDLOCK_DONE_ENABLED() ||
       MYSQL_HANDLER_WRLOCK_DONE_ENABLED() ||
       MYSQL_HANDLER_UNLOCK_DONE_ENABLED())
@@ -5482,9 +5486,6 @@ int handler::ha_external_lock(THD *thd, 
       MYSQL_HANDLER_UNLOCK_DONE(error);
     }
   }
-  
-  if (error == 0)
-    cached_table_flags= table_flags();
   DBUG_RETURN(error);
 }
 
@@ -5524,6 +5525,7 @@ int handler::ha_write_row(uchar *buf)
   
   if (unlikely(error != 0))
     DBUG_RETURN(error);
+
   if (unlikely(error= binlog_log_row(table, 0, buf, log_func)))
     DBUG_RETURN(error); /* purecov: inspected */
   DBUG_RETURN(0);

=== modified file 'sql/mysql_priv.h'
--- a/sql/mysql_priv.h	2009-06-24 08:14:35 +0000
+++ b/sql/mysql_priv.h	2009-07-03 12:25:24 +0000
@@ -330,9 +330,9 @@ enum open_table_mode
 #define MAX_ACCEPT_RETRY	10	// Test accept this many times
 #define MAX_FIELDS_BEFORE_HASH	32
 #define USER_VARS_HASH_SIZE     16
-#define TABLE_OPEN_CACHE_MIN    64
-#define TABLE_OPEN_CACHE_DEFAULT 64
-#define TABLE_DEF_CACHE_DEFAULT 256
+#define TABLE_OPEN_CACHE_MIN    400
+#define TABLE_OPEN_CACHE_DEFAULT 400
+#define TABLE_DEF_CACHE_DEFAULT 400
 /**
   We must have room for at least 256 table definitions in the table
   cache, since otherwise there is no chance prepared
@@ -346,7 +346,7 @@ enum open_table_mode
   for now the only solution is to ensure that the table definition
   cache can contain at least all tables of a given statement.
 */
-#define TABLE_DEF_CACHE_MIN     256
+#define TABLE_DEF_CACHE_MIN     400
 
 /*
   Stack reservation.
@@ -1202,12 +1202,14 @@ bool mysql_prepare_update(THD *thd, TABL
 int mysql_update(THD *thd,TABLE_LIST *tables,List<Item> &fields,
 		 List<Item> &values,COND *conds,
 		 uint order_num, ORDER *order, ha_rows limit,
-		 enum enum_duplicates handle_duplicates, bool ignore);
+		 enum enum_duplicates handle_duplicates, bool ignore,
+                 ha_rows *found_return, ha_rows *updated_return);
 bool mysql_multi_update(THD *thd, TABLE_LIST *table_list,
                         List<Item> *fields, List<Item> *values,
                         COND *conds, ulonglong options,
                         enum enum_duplicates handle_duplicates, bool ignore,
-                        SELECT_LEX_UNIT *unit, SELECT_LEX *select_lex);
+                        SELECT_LEX_UNIT *unit, SELECT_LEX *select_lex,
+                        multi_update **result);
 bool mysql_prepare_insert(THD *thd, TABLE_LIST *table_list, TABLE *table,
                           List<Item> &fields, List_item *values,
                           List<Item> &update_fields,

=== modified file 'sql/mysqld.cc'
--- a/sql/mysqld.cc	2009-07-02 08:12:35 +0000
+++ b/sql/mysqld.cc	2009-07-03 12:25:24 +0000
@@ -30,6 +30,7 @@
 #include "sql_audit.h"
 #include "debug_sync.h"
 #include <waiting_threads.h>
+#include "probes_mysql.h"
 
 #include "../storage/myisam/ha_myisam.h"
 
@@ -130,6 +131,16 @@ extern "C" {					// Because of SCO 3.2V4
 #define SIGNAL_FMT "signal %d"
 #endif
 
+#ifdef HAVE_SOLARIS_LARGE_PAGES
+#include <sys/mman.h>
+#if defined(__sun__) && defined(__GNUC__) && defined(__cplusplus) \
+    && defined(_XOPEN_SOURCE)
+extern int getpagesizes(size_t *, int);
+extern int getpagesizes2(size_t *, int);
+extern int memcntl(caddr_t, size_t, int, caddr_t, int, int);
+#endif /* __sun__ ... */
+#endif /* HAVE_SOLARIS_LARGE_PAGES */
+
 #ifdef __NETWARE__
 #define zVOLSTATE_ACTIVE 6
 #define zVOLSTATE_DEACTIVE 2
@@ -524,6 +535,7 @@ my_bool opt_log_slow_admin_statements= 0
 my_bool opt_log_slow_slave_statements= 0;
 my_bool lower_case_file_system= 0;
 my_bool opt_large_pages= 0;
+my_bool opt_super_large_pages= 0;
 my_bool opt_myisam_use_mmap= 0;
 uint    opt_large_page_size= 0;
 #if defined(ENABLED_DEBUG_SYNC)
@@ -1990,6 +2002,10 @@ void close_connection(THD *thd, uint err
   if (lock)
     (void) pthread_mutex_unlock(&LOCK_thread_count);
   MYSQL_CONNECTION_DONE((int) errcode, thd->thread_id);
+  if (MYSQL_CONNECTION_DONE_ENABLED())
+  {
+    sleep(0); /* Workaround to avoid tailcall optimisation */
+  }
   DBUG_VOID_RETURN;
 }
 #endif /* EMBEDDED_LIBRARY */
@@ -3429,10 +3445,71 @@ static int init_common_variables(const c
   /* Initialize large page size */
   if (opt_large_pages && (opt_large_page_size= my_get_large_page_size()))
   {
+      DBUG_PRINT("info", ("Large page set, large_page_size = %d",
+                 opt_large_page_size));
       my_use_large_pages= 1;
       my_large_page_size= opt_large_page_size;
   }
+  else
+  {
+    opt_large_pages= 0;
+    /* 
+       Either not configured to use large pages or Linux haven't
+       been compiled with large page support
+    */
+  }
 #endif /* HAVE_LARGE_PAGES */
+#ifdef HAVE_SOLARIS_LARGE_PAGES
+#define LARGE_PAGESIZE (4*1024*1024)  /* 4MB */
+#define SUPER_LARGE_PAGESIZE (256*1024*1024)  /* 256MB */
+  if (opt_large_pages)
+  {
+  /*
+    tell the kernel that we want to use 4/256MB page for heap storage
+    and also for the stack. We use 4 MByte as default and if the
+    super-large-page is set we increase it to 256 MByte. 256 MByte
+    is for server installations with GBytes of RAM memory where
+    the MySQL Server will have page caches and other memory regions
+    measured in a number of GBytes.
+    We use as big pages as possible which isn't bigger than the above
+    desired page sizes.
+  */
+   int nelem;
+   int max_desired_page_size;
+   int max_page_size;
+   if (opt_super_large_pages)
+     max_page_size= SUPER_LARGE_PAGESIZE;
+   else
+     max_page_size= LARGE_PAGESIZE;
+   nelem = getpagesizes(NULL, 0);
+   if (nelem > 0)
+   {
+     size_t *pagesize = (size_t *) malloc(sizeof(size_t) * nelem);
+     if (pagesize != NULL && getpagesizes(pagesize, nelem) > 0)
+     {
+       size_t i, max_page_size= 0;
+       for (i= 0; i < nelem; i++)
+       {
+         if (pagesize[i] > max_page_size &&
+             pagesize[i] <= max_desired_page_size)
+            max_page_size= pagesize[i];
+       }
+       free(pagesize);
+       if (max_page_size > 0)
+       {
+         struct memcntl_mha mpss;
+
+         mpss.mha_cmd= MHA_MAPSIZE_BSSBRK;
+         mpss.mha_pagesize= max_page_size;
+         mpss.mha_flags= 0;
+         memcntl(NULL, 0, MC_HAT_ADVISE, (caddr_t)&mpss, 0, 0);
+         mpss.mha_cmd= MHA_MAPSIZE_STACK;
+         memcntl(NULL, 0, MC_HAT_ADVISE, (caddr_t)&mpss, 0, 0);
+       }
+     }
+   }
+  }
+#endif /* HAVE_SOLARIS_LARGE_PAGES */
 
   /* connections and databases needs lots of files */
   {
@@ -3700,7 +3777,6 @@ You should consider changing lower_case_
   table_alias_charset= (lower_case_table_names ?
 			files_charset_info :
 			&my_charset_bin);
-
   return 0;
 }
 
@@ -5865,6 +5941,7 @@ enum options_mysqld
   OPT_MAX_SP_RECURSION_DEPTH,
   OPT_AUTO_INCREMENT, OPT_AUTO_INCREMENT_OFFSET,
   OPT_ENABLE_LARGE_PAGES,
+  OPT_ENABLE_SUPER_LARGE_PAGES,
   OPT_TIMED_MUTEXES,
   OPT_OLD_STYLE_USER_LIMITS,
   OPT_LOG_SLOW_ADMIN_STATEMENTS,
@@ -6135,11 +6212,16 @@ struct my_option my_long_options[] =
   {"general_log", OPT_GENERAL_LOG,
    "Enable|disable general log", (uchar**) &opt_log,
    (uchar**) &opt_log, 0, GET_BOOL, OPT_ARG, 0, 0, 0, 0, 0, 0},
-#ifdef HAVE_LARGE_PAGES
+#ifdef HAVE_LARGE_PAGE_OPTION
   {"large-pages", OPT_ENABLE_LARGE_PAGES, "Enable support for large pages. \
 Disable with --skip-large-pages.",
-   (uchar**) &opt_large_pages, (uchar**) &opt_large_pages, 0, GET_BOOL, NO_ARG, 0, 0, 0,
-   0, 0, 0},
+   (uchar**) &opt_large_pages, (uchar**) &opt_large_pages, 0, GET_BOOL,
+   NO_ARG, 0, 0, 1, 0, 1, 0},
+  {"super-large-pages", OPT_ENABLE_SUPER_LARGE_PAGES,
+   "Enable support for super large pages. \
+Disable with --skip-super-large-pages.",
+   (uchar**) &opt_super_large_pages, (uchar**) &opt_super_large_pages, 0,
+   GET_BOOL, NO_ARG, 0, 0, 1, 0, 1, 0},
 #endif
   {"ignore-builtin-innodb", OPT_IGNORE_BUILTIN_INNODB ,
    "Disable initialization of builtin InnoDB plugin",
@@ -7129,13 +7211,11 @@ The minimum value for this variable is 4
    "minimal size of unit in wich space for results is allocated (last unit will be trimed after writing all result data.",
    (uchar**) &query_cache_min_res_unit, (uchar**) &query_cache_min_res_unit,
    0, GET_ULONG, REQUIRED_ARG, QUERY_CACHE_MIN_RESULT_DATA_SIZE,
-   0, (longlong) ULONG_MAX, 0, 1, 0},
-#endif /*HAVE_QUERY_CACHE*/
+   0, ULONG_MAX, 0, 1, 0},
   {"query_cache_size", OPT_QUERY_CACHE_SIZE,
    "The memory allocated to store results from old queries.",
    (uchar**) &query_cache_size, (uchar**) &query_cache_size, 0, GET_ULONG,
    REQUIRED_ARG, 0, 0, (longlong) ULONG_MAX, 0, 1024, 0},
-#ifdef HAVE_QUERY_CACHE
   {"query_cache_type", OPT_QUERY_CACHE_TYPE,
    "0 = OFF = Don't cache or retrieve results. 1 = ON = Cache all results except SELECT SQL_NO_CACHE ... queries. 2 = DEMAND = Cache only SELECT SQL_CACHE ... queries.",
    (uchar**) &global_system_variables.query_cache_type,
@@ -7994,6 +8074,7 @@ static int mysql_init_variables(void)
 #if defined(ENABLED_DEBUG_SYNC)
   opt_debug_sync_timeout= 0;
 #endif /* defined(ENABLED_DEBUG_SYNC) */
+  opt_super_large_pages= 0;
   key_map_full.set_all();
 
   /* Character sets */

=== modified file 'sql/net_serv.cc'
--- a/sql/net_serv.cc	2009-03-17 20:07:27 +0000
+++ b/sql/net_serv.cc	2009-07-02 15:02:45 +0000
@@ -48,6 +48,7 @@
 #include <violite.h>
 #include <signal.h>
 #include <errno.h>
+#include "probes_mysql.h"
 #ifdef __NETWARE__
 #include <sys/select.h>
 #endif
@@ -371,6 +372,7 @@ my_net_write(NET *net,const uchar *packe
 {
   uchar buff[NET_HEADER_SIZE];
   my_bool rc;
+
   if (unlikely(!net->vio)) /* nowhere to write */
     return 0;
 
@@ -1012,7 +1014,7 @@ my_net_read(NET *net)
   size_t len, complen;
 
   MYSQL_NET_READ_START();
-  
+
 #ifdef HAVE_COMPRESS
   if (!net->compress)
   {

=== removed file 'sql/probes.d'
--- a/sql/probes.d	2008-10-02 12:08:09 +0000
+++ b/sql/probes.d	1970-01-01 00:00:00 +0000
@@ -1,154 +0,0 @@
-/* Copyright (C) 2008 MySQL AB
-
-   This program is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; version 2 of the License.
-
-   This program is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with this program; if not, write to the Free Software
-   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA */
-
-/*
-  The actual probe names in DTrace scripts will replace '__' by '-'. Thus
-  insert__row__start will be insert-row-start.
-
-  Recommendations for adding new probes:
-
-  - each probe should have the minimal set of arguments required to
-  unambiguously identify the context in which the probe fires. Redundant
-  probes (i.e. the ones that can be obtained in user scripts from previous
-  probes' arguments or otherwise) may be added for convenience.
-
-  - try to avoid computationally expensive probe arguments. If impossible,
-  use *_ENABLED() macros to check if the probe is activated before
-  performing expensive calculations for a probe argument.
-
-  - all *-done probes should have a status argument wherever applicable to make
-  it possible for user scripts to figure out whether the completed operation
-  was successful or not.
-  
-  - for all status arguments, a non-zero value should be returned on error or
-  failure, 0 should be returned on success.
-*/
-
-provider mysql {
-  
-  /* The following ones fire when creating or closing a client connection */
-  probe connection__start(unsigned long conn_id, char *user, char *host);
-  probe connection__done(int status, unsigned long conn_id);
-
-  /*
-    Fire at the start/end of any client command processing (including SQL
-    queries).
-  */
-  probe command__start(unsigned long conn_id, int command,
-                       char *user, char *host);
-  probe command__done(int status);
-  
-  /*
-    The following probes fire at the start/end of any SQL query processing,
-    respectively.
-
-    query_start() has a lot of parameters that can be used to pick up
-    parameters for a lot of other probes here.  For simplicity reasons we also
-    add the query string to most other DTrace probes as well. Hostname is
-    either the hostname or the IP address of the MySQL Client.
-  */
-  probe query__start(char *query,
-                     unsigned long conn_id,
-                     char *db_name,
-                     char *user,
-                     char *host);
-  probe query__done(int status); 
-
-  /* Fire at the start/end of SQL query parsing */
-  probe query__parse__start(char *query);
-  probe query__parse__done(int status);
-
-  /* Track whether the query hits the query cache or not */
-  probe query__cache__hit(char *query, unsigned long rows);
-  probe query__cache__miss(char *query);
-
-  /*
-    This probe fires when the actual query execution starts, i.e. after
-    parsing and checking the query cache, but before privilege checks,
-    optimizing, etc.
-
-    Query means also all independent queries of a stored procedure and prepared
-    statements. Also the stored procedure itself is a query.
-
-    exec_type is:
-    0:           Executed query from sql_parse, top-level query (sql_parse.cc)
-    1:           Executed prepared statement (sql_prepare.cc)
-    2:           Executed cursor statement (sql_cursor.cc)
-    3:           Executed query in stored procedure (sp_head.cc)
-  */
-  probe query__exec__start(char *query,
-                           unsigned long connid,
-                           char *db_name,
-                           char *user,
-                           char *host,
-                           int exec_type);
-  probe query__exec__done(int status);
-
-  /* These probes fire when performing write operations towards any handler */
-  probe insert__row__start(char *db, char *table);
-  probe insert__row__done(int status);
-  probe update__row__start(char *db, char *table);
-  probe update__row__done(int status);
-  probe delete__row__start(char *db, char *table);
-  probe delete__row__done(int status);
-
-  /*
-    These probes fire when calling external_lock for any handler
-    depending on the lock type being acquired or released.
-  */
-  probe handler__rdlock__start(char *db, char *table);
-  probe handler__wrlock__start(char *db, char *table);
-  probe handler__unlock__start(char *db, char *table);
-  probe handler__rdlock__done(int status);
-  probe handler__wrlock__done(int status);
-  probe handler__unlock__done(int status);
-  
-  /*
-    These probes fire when a filesort activity happens in a query.
-  */
-  probe filesort__start(char *db, char *table);
-  probe filesort__done(int status, unsigned long rows);
-  /*
-    The query types SELECT, INSERT, INSERT AS SELECT, UPDATE, UPDATE with
-    multiple tables, DELETE, DELETE with multiple tables are all probed.
-    The start probe always contains the query text.
-  */
-  probe select__start(char *query);
-  probe select__done(int status, unsigned long rows);
-  probe insert__start(char *query);
-  probe insert__done(int status, unsigned long rows);
-  probe insert__select__start(char *query);
-  probe insert__select__done(int status, unsigned long rows);
-  probe update__start(char *query);
-  probe update__done(int status,
-                     unsigned long rowsmatches, unsigned long rowschanged);
-  probe multi__update__start(char *query);
-  probe multi__update__done(int status,
-                            unsigned long rowsmatches, unsigned long rowschanged);
-  probe delete__start(char *query);
-  probe delete__done(int status, unsigned long rows);
-  probe multi__delete__start(char *query);
-  probe multi__delete__done(int status, unsigned long rows);
-
-  /*
-    These probes can be used to measure the time waiting for network traffic
-    or identify network-related problems.
-  */
-  probe net__read__start();
-  probe net__read__done(int status, unsigned long bytes);
-  probe net__write__start(unsigned long bytes);
-  probe net__write__done(int status);
-
-};

=== modified file 'sql/sp_head.cc'
--- a/sql/sp_head.cc	2009-06-12 02:01:08 +0000
+++ b/sql/sp_head.cc	2009-07-02 15:02:45 +0000
@@ -15,6 +15,7 @@
 
 #include "mysql_priv.h"
 #include "sql_prepare.h"
+#include "probes_mysql.h"
 #ifdef USE_PRAGMA_IMPLEMENTATION
 #pragma implementation
 #endif
@@ -2924,7 +2925,7 @@ sp_instr_stmt::exec_core(THD *thd, uint 
 {
   MYSQL_QUERY_EXEC_START(thd->query,
                          thd->thread_id,
-                         (char *) (thd->db ? thd->db: ""),
+                         (char *) (thd->db ? thd->db : ""),
                          thd->security_ctx->priv_user,
                          (char *) thd->security_ctx->host_or_ip,
                          3);

=== modified file 'sql/sql_cache.cc'
--- a/sql/sql_cache.cc	2009-05-15 13:45:06 +0000
+++ b/sql/sql_cache.cc	2009-07-02 14:23:36 +0000
@@ -334,6 +334,7 @@ TODO list:
 #include <hash.h>
 #include "../storage/myisammrg/ha_myisammrg.h"
 #include "../storage/myisammrg/myrg_def.h"
+#include "probes_mysql.h"
 
 #ifdef EMBEDDED_LIBRARY
 #include "emb_qcache.h"
@@ -1224,11 +1225,16 @@ end:
   Check if the query is in the cache. If it was cached, send it
   to the user.
 
-  RESULTS
-        1	The query was cached and user was sent the result.
-	0	Query was not cached.
-	-1	The query was cached but we didn't have rights to use it.
-		No error is sent to the client yet.
+  @param thd Pointer to the thread handler
+  @param sql A pointer to the sql statement *
+  @param query_length Length of the statement in characters
+ 
+  @return status code
+  @retval 0  Query was not cached.
+  @retval 1  The query was cached and user was sent the result.
+  @retval -1 The query was cached but we didn't have rights to use it. 
+  
+  In case of -1, no error is sent to the client.
 
   NOTE
   This method requires that sql points to allocated memory of size:
@@ -1544,8 +1550,8 @@ def_week_frmt: %lu, in_trans: %d, autoco
   thd->status_var.last_query_cost= 0.0;
   thd->stmt_da->disable_status();
 
-  MYSQL_QUERY_CACHE_HIT(thd->query, (ulong) thd->limit_found_rows);
   BLOCK_UNLOCK_RD(query_block);
+  MYSQL_QUERY_CACHE_HIT(thd->query, (ulong) thd->limit_found_rows);
   DBUG_RETURN(1);				// Result sent to client
 
 err_unlock:

=== modified file 'sql/sql_class.h'
--- a/sql/sql_class.h	2009-06-30 08:03:05 +0000
+++ b/sql/sql_class.h	2009-07-03 12:25:24 +0000
@@ -3108,6 +3108,10 @@ public:
   void send_error(uint errcode,const char *err);
   int  do_deletes();
   bool send_eof();
+  inline ha_rows num_deleted()
+  {
+    return deleted;
+  }
   virtual void abort();
 };
 
@@ -3151,6 +3155,14 @@ public:
   void send_error(uint errcode,const char *err);
   int  do_updates();
   bool send_eof();
+  inline ha_rows num_found()
+  {
+    return found;
+  }
+  inline ha_rows num_updated()
+  {
+    return updated;
+  }
   virtual void abort();
 };
 

=== modified file 'sql/sql_connect.cc'
--- a/sql/sql_connect.cc	2009-06-09 14:36:52 +0000
+++ b/sql/sql_connect.cc	2009-07-02 14:23:36 +0000
@@ -20,6 +20,7 @@
 
 #include "mysql_priv.h"
 #include "sql_audit.h"
+#include "probes_mysql.h"
 
 #if defined(HAVE_OPENSSL) && !defined(EMBEDDED_LIBRARY)
 /*
@@ -1148,7 +1149,7 @@ pthread_handler_t handle_one_connection(
 
     MYSQL_CONNECTION_START(thd->thread_id, thd->security_ctx->priv_user,
                            (char *) thd->security_ctx->host_or_ip);
-  
+
     prepare_new_connection_state(thd);
 
     while (!net->error && net->vio != 0 &&

=== modified file 'sql/sql_cursor.cc'
--- a/sql/sql_cursor.cc	2009-01-26 16:03:39 +0000
+++ b/sql/sql_cursor.cc	2009-07-02 14:23:36 +0000
@@ -19,6 +19,7 @@
 #include "mysql_priv.h"
 #include "sql_cursor.h"
 #include "sql_select.h"
+#include "probes_mysql.h"
 
 /****************************************************************************
   Declarations.

=== modified file 'sql/sql_insert.cc'
--- a/sql/sql_insert.cc	2009-06-17 07:30:19 +0000
+++ b/sql/sql_insert.cc	2009-07-02 14:23:36 +0000
@@ -3184,6 +3184,9 @@ bool select_insert::send_data(List<Item>
       DBUG_RETURN(1);
     }
   }
+
+  // Release latches in case bulk insert takes a long time
+  ha_release_temporary_latches(thd);
   
   // Release latches in case bulk insert takes a long time
   ha_release_temporary_latches(thd);

=== modified file 'sql/sql_parse.cc'
--- a/sql/sql_parse.cc	2009-06-30 08:03:05 +0000
+++ b/sql/sql_parse.cc	2009-07-03 12:25:24 +0000
@@ -37,6 +37,7 @@
 #ifdef BACKUP_TEST
 #include "backup/backup_test.h"
 #endif
+#include "probes_mysql.h"
 
 /**
   @defgroup Runtime_Environment Runtime Environment
@@ -1085,7 +1086,6 @@ bool dispatch_command(enum enum_server_c
       {
         MYSQL_QUERY_DONE(thd->is_error());
       }
-      
 #if defined(ENABLED_PROFILING)
       thd->profiling.finish_current_query();
       thd->profiling.start_new_query("continuing");
@@ -1488,7 +1488,6 @@ bool dispatch_command(enum enum_server_c
     }
     MYSQL_COMMAND_DONE(res);
   }
-  
   DBUG_RETURN(error);
 }
 
@@ -3129,6 +3128,8 @@ end_with_restore_list:
     break;
   }
   case SQLCOM_UPDATE:
+  {
+    ha_rows found= 0, updated= 0;
     DBUG_ASSERT(first_table == all_tables && first_table != 0);
     if (update_precheck(thd, all_tables))
       break;
@@ -3145,7 +3146,9 @@ end_with_restore_list:
                                   select_lex->order_list.elements,
                                   (ORDER *) select_lex->order_list.first,
                                   unit->select_limit_cnt,
-                                  lex->duplicates, lex->ignore));
+                                  lex->duplicates, lex->ignore,
+                                  &found, &updated));
+    MYSQL_UPDATE_DONE(res, found, updated);
     /* mysql_update return 2 if we need to switch to multi-update */
     if (up_result != 2)
     {
@@ -3153,6 +3156,7 @@ end_with_restore_list:
       break;
     }
     /* Fall through */
+  }
   case SQLCOM_UPDATE_MULTI:
   {
     DBUG_ASSERT(first_table == all_tables && first_table != 0);
@@ -3209,14 +3213,32 @@ end_with_restore_list:
 #ifdef HAVE_REPLICATION
     }  /* unlikely */
 #endif
-    MYSQL_MULTI_UPDATE_START(thd->query);
-    res= mysql_multi_update(thd, all_tables,
-                            &select_lex->item_list,
-                            &lex->value_list,
-                            select_lex->where,
-                            select_lex->options,
-                            lex->duplicates, lex->ignore, unit, select_lex);
-    DEBUG_SYNC(thd, "after_update");
+    {
+      multi_update *result_obj;
+      MYSQL_MULTI_UPDATE_START(thd->query);
+      res= mysql_multi_update(thd, all_tables,
+                              &select_lex->item_list,
+                              &lex->value_list,
+                              select_lex->where,
+                              select_lex->options,
+                              lex->duplicates,
+                              lex->ignore,
+                              unit,
+                              select_lex,
+                              &result_obj);
+      DEBUG_SYNC(thd, "after_update");
+      if (result_obj)
+      {
+        MYSQL_MULTI_UPDATE_DONE(res, result_obj->num_found(),
+                                result_obj->num_updated());
+        res= FALSE; /* Ignore errors here */
+        delete result_obj;
+      }
+      else
+      {
+        MYSQL_MULTI_UPDATE_DONE(1, 0, 0);
+      }
+    }
     break;
   }
   case SQLCOM_REPLACE:
@@ -3267,7 +3289,7 @@ end_with_restore_list:
     res= mysql_insert(thd, all_tables, lex->field_list, lex->many_values,
 		      lex->update_list, lex->value_list,
                       lex->duplicates, lex->ignore);
-
+    MYSQL_INSERT_DONE(res, (ulong) thd->row_count_func);
     /*
       If we have inserted into a VIEW, and the base table has
       AUTO_INCREMENT column, but this column is not accessible through
@@ -3304,13 +3326,12 @@ end_with_restore_list:
       res= 1;
       break;
     }
-
     if (!(res= open_and_lock_tables(thd, all_tables)))
     {
-      MYSQL_INSERT_SELECT_START(thd->query);
-      
       /* Skip first table, which is the table we are inserting in */
       TABLE_LIST *second_table= first_table->next_local;
+      
+      MYSQL_INSERT_SELECT_START(thd->query);
       select_lex->table_list.first= (uchar*) second_table;
       select_lex->context.table_list= 
         select_lex->context.first_name_resolution_table= second_table;
@@ -3342,9 +3363,9 @@ end_with_restore_list:
         delete sel_result;
       }
       /* revert changes for SP */
+      MYSQL_INSERT_SELECT_DONE(res, (ulong) thd->row_count_func);
       select_lex->table_list.first= (uchar*) first_table;
     }
-
     /*
       If we have inserted into a VIEW, and the base table has
       AUTO_INCREMENT column, but this column is not accessible through
@@ -3396,6 +3417,7 @@ end_with_restore_list:
                        &select_lex->order_list,
                        unit->select_limit_cnt, select_lex->options,
                        FALSE);
+    MYSQL_DELETE_DONE(res, (ulong) thd->row_count_func);
     break;
   }
   case SQLCOM_DELETE_MULTI:
@@ -3426,8 +3448,12 @@ end_with_restore_list:
     if ((res= open_and_lock_tables(thd, all_tables)))
       break;
 
+    MYSQL_MULTI_DELETE_START(thd->query);
     if ((res= mysql_multi_delete_prepare(thd)))
+    {
+      MYSQL_MULTI_DELETE_DONE(1, 0);
       goto error;
+    }
 
     if (!thd->is_fatal_error &&
         (del_result= new multi_delete(aux_tables, lex->table_count)))
@@ -3444,12 +3470,16 @@ end_with_restore_list:
                         OPTION_SETUP_TABLES_DONE,
 			del_result, unit, select_lex);
       res|= thd->is_error();
+      MYSQL_MULTI_DELETE_DONE(res, del_result->num_deleted());
       if (res)
         del_result->abort();
       delete del_result;
     }
     else
+    {
       res= TRUE;                                // Error
+      MYSQL_MULTI_DELETE_DONE(1, 0);
+    }
     break;
   }
   case SQLCOM_DROP_TABLE:
@@ -5981,7 +6011,7 @@ void mysql_parse(THD *thd, const char *i
             thd->server_status|= SERVER_MORE_RESULTS_EXISTS;
           }
           lex->set_trg_event_type_for_tables();
-	  MYSQL_QUERY_EXEC_START(thd->query,
+          MYSQL_QUERY_EXEC_START(thd->query,
                                  thd->thread_id,
                                  (char *) (thd->db ? thd->db : ""),
                                  thd->security_ctx->priv_user,
@@ -5989,7 +6019,7 @@ void mysql_parse(THD *thd, const char *i
                                  0);
 
           error= mysql_execute_command(thd);
-	  MYSQL_QUERY_EXEC_DONE(error);
+          MYSQL_QUERY_EXEC_DONE(error);
 	}
       }
     }
@@ -7958,10 +7988,10 @@ bool parse_sql(THD *thd,
                Object_creation_ctx *creation_ctx)
 {
   bool mysql_parse_status;
+  bool ret_value;
   DBUG_ASSERT(thd->m_parser_state == NULL);
 
   MYSQL_QUERY_PARSE_START(thd->query);
-  
   /* Backup creation context. */
 
   Object_creation_ctx *backup_ctx= NULL;
@@ -7993,9 +8023,9 @@ bool parse_sql(THD *thd,
 
   /* That's it. */
 
-  MYSQL_QUERY_PARSE_DONE(mysql_parse_status || thd->is_fatal_error);
-  
-  return mysql_parse_status || thd->is_fatal_error;
+  ret_value= mysql_parse_status || thd->is_fatal_error;
+  MYSQL_QUERY_PARSE_DONE(ret_value);
+  return ret_value;
 }
 
 /**

=== modified file 'sql/sql_prepare.cc'
--- a/sql/sql_prepare.cc	2009-06-06 00:02:04 +0000
+++ b/sql/sql_prepare.cc	2009-07-02 14:23:36 +0000
@@ -90,6 +90,7 @@ When one supplies long data for a placeh
 #include "sp_head.h"
 #include "sp.h"
 #include "sp_cache.h"
+#include "probes_mysql.h"
 #ifdef EMBEDDED_LIBRARY
 /* include MYSQL_BIND headers */
 #include <mysql.h>

=== modified file 'sql/sql_select.cc'
--- a/sql/sql_select.cc	2009-06-30 08:03:05 +0000
+++ b/sql/sql_select.cc	2009-07-03 12:25:24 +0000
@@ -31,6 +31,7 @@
 #include "mysql_priv.h"
 #include "sql_select.h"
 #include "sql_cursor.h"
+#include "probes_mysql.h"
 
 #include <m_ctype.h>
 #include <my_bit.h>
@@ -311,7 +312,6 @@ bool handle_select(THD *thd, LEX *lex, s
     result->abort();
 
   MYSQL_SELECT_DONE((int) res, (ulong) thd->limit_found_rows);
-
   DBUG_RETURN(res);
 }
 

=== modified file 'sql/sql_update.cc'
--- a/sql/sql_update.cc	2009-05-31 12:05:01 +0000
+++ b/sql/sql_update.cc	2009-07-02 15:02:45 +0000
@@ -180,7 +180,8 @@ int mysql_update(THD *thd,
                  COND *conds,
                  uint order_num, ORDER *order,
 		 ha_rows limit,
-		 enum enum_duplicates handle_duplicates, bool ignore)
+		 enum enum_duplicates handle_duplicates, bool ignore,
+                 ha_rows *found_return, ha_rows *updated_return)
 {
   bool		using_limit= limit != HA_POS_ERROR;
   bool		safe_update= test(thd->options & OPTION_SAFE_UPDATES);
@@ -210,7 +211,6 @@ int mysql_update(THD *thd,
   {
     if (open_tables(thd, &table_list, &table_count, 0))
     {
-      MYSQL_UPDATE_DONE(1, 0, 0);
       DBUG_RETURN(1);
     }
 
@@ -221,14 +221,12 @@ int mysql_update(THD *thd,
       /* pass counter value */
       thd->lex->table_count= table_count;
       /* convert to multiupdate */
-      MYSQL_UPDATE_DONE(2, 0, 0);
       DBUG_RETURN(2);
     }
     if (!lock_tables(thd, table_list, table_count, 0, &need_reopen))
       break;
     if (!need_reopen)
     {
-      MYSQL_UPDATE_DONE(1, 0, 0);
       DBUG_RETURN(1);
     }
     close_tables_for_reopen(thd, &table_list, FALSE);
@@ -238,7 +236,6 @@ int mysql_update(THD *thd,
       (thd->fill_derived_tables() &&
        mysql_handle_derived(thd->lex, &mysql_derived_filling)))
   {
-    MYSQL_UPDATE_DONE(1, 0, 0);
     DBUG_RETURN(1);
   }
 
@@ -301,7 +298,6 @@ int mysql_update(THD *thd,
   if (select_lex->inner_refs_list.elements &&
     fix_inner_refs(thd, all_fields, select_lex, select_lex->ref_pointer_array))
   {
-    MYSQL_UPDATE_DONE(1, 0, 0);
     DBUG_RETURN(-1);
   }
 
@@ -331,7 +327,6 @@ int mysql_update(THD *thd,
   {
     free_underlaid_joins(thd, select_lex);
     my_ok(thd);				// No matching records
-    MYSQL_UPDATE_DONE(0, 0, 0);
     DBUG_RETURN(0);
   }
 #endif
@@ -348,7 +343,6 @@ int mysql_update(THD *thd,
     if (error)
       goto abort;				// Error in where
     my_ok(thd);				// No matching records
-    MYSQL_UPDATE_DONE(0, 0, 0);
     DBUG_RETURN(0);
   }
   if (!select && limit != HA_POS_ERROR)
@@ -843,10 +837,9 @@ int mysql_update(THD *thd,
   }
   thd->count_cuted_fields= CHECK_FIELD_IGNORE;		/* calc cuted fields */
   thd->abort_on_warning= 0;
-
-  res= (error >= 0 || thd->is_error()) ? 1 : 0;
-  MYSQL_UPDATE_DONE(res, (ulong) found, (ulong) updated);
-  DBUG_RETURN(res);
+  *found_return= found;
+  *updated_return= updated;
+  DBUG_RETURN((error >= 0 || thd->is_error()) ? 1 : 0);
 
 err:
   delete select;
@@ -859,7 +852,6 @@ err:
   thd->abort_on_warning= 0;
 
 abort:
-  MYSQL_UPDATE_DONE(1, 0, 0);
   DBUG_RETURN(1);
 }
 
@@ -1211,18 +1203,22 @@ bool mysql_multi_update(THD *thd,
                         List<Item> *values,
                         COND *conds,
                         ulonglong options,
-                        enum enum_duplicates handle_duplicates, bool ignore,
-                        SELECT_LEX_UNIT *unit, SELECT_LEX *select_lex)
+                        enum enum_duplicates handle_duplicates,
+                        bool ignore,
+                        SELECT_LEX_UNIT *unit,
+                        SELECT_LEX *select_lex,
+                        multi_update **result)
 {
-  multi_update *result;
   bool res;
   DBUG_ENTER("mysql_multi_update");
 
-  if (!(result= new multi_update(table_list,
+  if (!(*result= new multi_update(table_list,
 				 thd->lex->select_lex.leaf_tables,
 				 fields, values,
 				 handle_duplicates, ignore)))
+  {
     DBUG_RETURN(TRUE);
+  }
 
   thd->abort_on_warning= test(thd->variables.sql_mode &
                               (MODE_STRICT_TRANS_TABLES |
@@ -1236,19 +1232,18 @@ bool mysql_multi_update(THD *thd,
                       (ORDER *)NULL,
                       options | SELECT_NO_JOIN_CACHE | SELECT_NO_UNLOCK |
                       OPTION_SETUP_TABLES_DONE,
-                      result, unit, select_lex);
+                      *result, unit, select_lex);
   DBUG_PRINT("info",("res: %d  report_error: %d", res,
                      (int) thd->is_error()));
   res|= thd->is_error();
   if (unlikely(res))
   {
     /* If we had a another error reported earlier then this will be ignored */
-    result->send_error(ER_UNKNOWN_ERROR, ER(ER_UNKNOWN_ERROR));
-    result->abort();
+    (*result)->send_error(ER_UNKNOWN_ERROR, ER(ER_UNKNOWN_ERROR));
+    (*result)->abort();
   }
-  delete result;
   thd->abort_on_warning= 0;
-  DBUG_RETURN(FALSE);
+  DBUG_RETURN(res);
 }
 
 
@@ -1805,7 +1800,6 @@ bool multi_update::send_data(List<Item> 
       }
     }
   }
-  MYSQL_UPDATE_DONE(0, (ulong) found, (ulong) updated);
   DBUG_RETURN(0);
 }
 

=== modified file 'storage/archive/Makefile.am'
--- a/storage/archive/Makefile.am	2009-01-12 15:00:34 +0000
+++ b/storage/archive/Makefile.am	2009-07-02 14:23:36 +0000
@@ -86,3 +86,23 @@ valgrind-test: archive_test archive_perf
 	libtool --mode=execute valgrind --leak-check=yes --show-reachable=yes  ./archive_performance
 
 EXTRA_DIST =		CMakeLists.txt plug.in
+
+if HAVE_DTRACE_DASH_G
+libarchive_a_LIBADD = probes_mysql.o
+libarchive_a_DEPENDENCIES = probes_mysql.o dtrace_files dtrace_providers
+CLEANFILES = probes_mysql.o dtrace_files dtrace_providers
+DTRACEFILES = libarchive_a-ha_archive.o
+DTRACEPROVIDER = probes_mysql.d
+
+dtrace_files:
+	echo $(DTRACEFILES) > $@
+dtrace_providers: probes_mysql.d
+	echo $(DTRACEPROVIDER) > $@
+probes_mysql.d:
+	-$(RM) -f probes_mysql.d
+	$(CP) $(top_srcdir)/include/probes_mysql.d.base probes_mysql.d
+	echo timestamp > dtrace_sources
+
+probes_mysql.o: $(DTRACEPROVIDER) $(DTRACEFILES)
+	$(DTRACE) $(DTRACEFLAGS) -G -s $(DTRACEPROVIDER) $(DTRACEFILES) -o $@
+endif

=== modified file 'storage/archive/ha_archive.cc'
--- a/storage/archive/ha_archive.cc	2009-05-15 13:45:06 +0000
+++ b/storage/archive/ha_archive.cc	2009-07-02 14:23:36 +0000
@@ -18,6 +18,7 @@
 #endif
 
 #include "mysql_priv.h"
+#include "probes_mysql.h"
 #include <myisam.h>
 
 #include "ha_archive.h"
@@ -917,7 +918,9 @@ int ha_archive::index_read(uchar *buf, c
 {
   int rc;
   DBUG_ENTER("ha_archive::index_read");
+  MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
   rc= index_read_idx(buf, active_index, key, key_len, find_flag);
+  MYSQL_INDEX_READ_ROW_DONE(rc);
   DBUG_RETURN(rc);
 }
 
@@ -964,8 +967,10 @@ error:
 int ha_archive::index_next(uchar * buf) 
 { 
   bool found= 0;
+  int rc;
 
   DBUG_ENTER("ha_archive::index_next");
+  MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
 
   while (!(get_row(&archive, buf)))
   {
@@ -976,7 +981,9 @@ int ha_archive::index_next(uchar * buf) 
     }
   }
 
-  DBUG_RETURN(found ? 0 : HA_ERR_END_OF_FILE); 
+  rc= found ? 0 : HA_ERR_END_OF_FILE;
+  MYSQL_INDEX_READ_ROW_DONE(rc);
+  DBUG_RETURN(rc);
 }
 
 /*
@@ -1102,12 +1109,17 @@ int ha_archive::rnd_next(uchar *buf)
 {
   int rc;
   DBUG_ENTER("ha_archive::rnd_next");
+  MYSQL_READ_ROW_START(table_share->db.str,
+                       table_share->table_name.str, TRUE);
 
   if (share->crashed)
       DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE);
 
   if (!scan_rows)
-    DBUG_RETURN(HA_ERR_END_OF_FILE);
+  {
+    rc= HA_ERR_END_OF_FILE;
+    goto end;
+  }
   scan_rows--;
 
   ha_statistic_increment(&SSV::ha_read_rnd_next_count);
@@ -1116,6 +1128,8 @@ int ha_archive::rnd_next(uchar *buf)
 
   table->status=rc ? STATUS_NOT_FOUND: 0;
 
+end:
+  MYSQL_READ_ROW_DONE(rc);
   DBUG_RETURN(rc);
 }
 
@@ -1143,12 +1157,21 @@ void ha_archive::position(const uchar *r
 
 int ha_archive::rnd_pos(uchar * buf, uchar *pos)
 {
+  int rc;
   DBUG_ENTER("ha_archive::rnd_pos");
+  MYSQL_READ_ROW_START(table_share->db.str,
+                       table_share->table_name.str, FALSE);
   ha_statistic_increment(&SSV::ha_read_rnd_next_count);
   current_position= (my_off_t)my_get_ptr(pos, ref_length);
   if (azseek(&archive, (size_t)current_position, SEEK_SET) == (size_t)(-1L))
-    DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE);
-  DBUG_RETURN(get_row(&archive, buf));
+  {
+    rc= HA_ERR_CRASHED_ON_USAGE;
+    goto end;
+  }
+  rc= get_row(&archive, buf);
+end:
+  MYSQL_READ_ROW_DONE(rc);
+  DBUG_RETURN(rc);
 }
 
 /*

=== modified file 'storage/blackhole/Makefile.am'
--- a/storage/blackhole/Makefile.am	2009-01-07 10:58:33 +0000
+++ b/storage/blackhole/Makefile.am	2009-07-02 14:23:36 +0000
@@ -44,3 +44,23 @@ libblackhole_a_SOURCES=	ha_blackhole.cc
 
 
 EXTRA_DIST =		CMakeLists.txt plug.in
+
+if HAVE_DTRACE_DASH_G
+libblackhole_a_LIBADD = probes_mysql.o
+libblackhole_a_DEPENDENCIES = probes_mysql.o dtrace_files dtrace_providers
+CLEANFILES = probes_mysql.o dtrace_files dtrace_providers
+DTRACEFILES = libblackhole_a-ha_blackhole.o
+DTRACEPROVIDER = probes_mysql.d
+
+dtrace_files:
+	echo $(DTRACEFILES) > $@
+dtrace_providers: probes_mysql.d
+	echo $(DTRACEPROVIDER) > $@
+probes_mysql.d:
+	-$(RM) -f probes_mysql.d
+	$(CP) $(top_srcdir)/include/probes_mysql.d.base probes_mysql.d
+	echo timestamp > dtrace_sources
+
+probes_mysql.o: $(DTRACEPROVIDER) $(DTRACEFILES)
+	$(DTRACE) $(DTRACEFLAGS) -G -s $(DTRACEPROVIDER) $(DTRACEFILES) -o $@
+endif

=== modified file 'storage/blackhole/ha_blackhole.cc'
--- a/storage/blackhole/ha_blackhole.cc	2009-01-27 02:08:48 +0000
+++ b/storage/blackhole/ha_blackhole.cc	2009-07-02 14:23:36 +0000
@@ -20,6 +20,7 @@
 
 #define MYSQL_SERVER 1
 #include "mysql_priv.h"
+#include "probes_mysql.h"
 #include "ha_blackhole.h"
 
 /* Static declarations for handlerton */
@@ -128,18 +129,27 @@ int ha_blackhole::rnd_init(bool scan)
 
 int ha_blackhole::rnd_next(uchar *buf)
 {
+  int rc;
   DBUG_ENTER("ha_blackhole::rnd_next");
+  MYSQL_READ_ROW_START(table_share->db.str, table_share->table_name.str,
+                       TRUE);
   THD *thd= ha_thd();
   if (thd->system_thread == SYSTEM_THREAD_SLAVE_SQL && thd->query == NULL)
-    DBUG_RETURN(0);
-  DBUG_RETURN(HA_ERR_END_OF_FILE);
+    rc= 0;
+  else
+    rc= HA_ERR_END_OF_FILE;
+  MYSQL_READ_ROW_DONE(rc);
+  DBUG_RETURN(rc);
 }
 
 
 int ha_blackhole::rnd_pos(uchar * buf, uchar *pos)
 {
   DBUG_ENTER("ha_blackhole::rnd_pos");
+  MYSQL_READ_ROW_START(table_share->db.str, table_share->table_name.str,
+                       FALSE);
   DBUG_ASSERT(0);
+  MYSQL_READ_ROW_DONE(0);
   DBUG_RETURN(0);
 }
 
@@ -210,11 +220,16 @@ int ha_blackhole::index_read_map(uchar *
                                  key_part_map keypart_map,
                              enum ha_rkey_function find_flag)
 {
+  int rc;
   DBUG_ENTER("ha_blackhole::index_read");
+  MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
   THD *thd= ha_thd();
   if (thd->system_thread == SYSTEM_THREAD_SLAVE_SQL && thd->query == NULL)
-    DBUG_RETURN(0);
-  DBUG_RETURN(HA_ERR_END_OF_FILE);
+    rc= 0;
+  else
+    rc= HA_ERR_END_OF_FILE;
+  MYSQL_INDEX_READ_ROW_DONE(rc);
+  DBUG_RETURN(rc);
 }
 
 
@@ -222,50 +237,77 @@ int ha_blackhole::index_read_idx_map(uch
                                  key_part_map keypart_map,
                                  enum ha_rkey_function find_flag)
 {
+  int rc;
   DBUG_ENTER("ha_blackhole::index_read_idx");
+  MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
   THD *thd= ha_thd();
   if (thd->system_thread == SYSTEM_THREAD_SLAVE_SQL && thd->query == NULL)
-    DBUG_RETURN(0);
-  DBUG_RETURN(HA_ERR_END_OF_FILE);
+    rc= 0;
+  else
+    rc= HA_ERR_END_OF_FILE;
+  MYSQL_INDEX_READ_ROW_DONE(rc);
+  DBUG_RETURN(rc);
 }
 
 
 int ha_blackhole::index_read_last_map(uchar * buf, const uchar * key,
                                       key_part_map keypart_map)
 {
+  int rc;
   DBUG_ENTER("ha_blackhole::index_read_last");
+  MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
   THD *thd= ha_thd();
   if (thd->system_thread == SYSTEM_THREAD_SLAVE_SQL && thd->query == NULL)
-    DBUG_RETURN(0);
-  DBUG_RETURN(HA_ERR_END_OF_FILE);
+    rc= 0;
+  else
+    rc= HA_ERR_END_OF_FILE;
+  MYSQL_INDEX_READ_ROW_DONE(rc);
+  DBUG_RETURN(rc);
 }
 
 
 int ha_blackhole::index_next(uchar * buf)
 {
+  int rc;
   DBUG_ENTER("ha_blackhole::index_next");
-  DBUG_RETURN(HA_ERR_END_OF_FILE);
+  MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
+  rc= HA_ERR_END_OF_FILE;
+  MYSQL_INDEX_READ_ROW_DONE(rc);
+  DBUG_RETURN(rc);
 }
 
 
 int ha_blackhole::index_prev(uchar * buf)
 {
+  int rc;
   DBUG_ENTER("ha_blackhole::index_prev");
-  DBUG_RETURN(HA_ERR_END_OF_FILE);
+  MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
+  rc= HA_ERR_END_OF_FILE;
+  MYSQL_INDEX_READ_ROW_DONE(rc);
+  DBUG_RETURN(rc);
 }
 
 
 int ha_blackhole::index_first(uchar * buf)
 {
+  int rc;
   DBUG_ENTER("ha_blackhole::index_first");
+  MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
+  rc= HA_ERR_END_OF_FILE;
+  MYSQL_INDEX_READ_ROW_DONE(rc);
+  DBUG_RETURN(rc);
   DBUG_RETURN(HA_ERR_END_OF_FILE);
 }
 
 
 int ha_blackhole::index_last(uchar * buf)
 {
+  int rc;
   DBUG_ENTER("ha_blackhole::index_last");
-  DBUG_RETURN(HA_ERR_END_OF_FILE);
+  MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
+  rc= HA_ERR_END_OF_FILE;
+  MYSQL_INDEX_READ_ROW_DONE(rc);
+  DBUG_RETURN(rc);
 }
 
 

=== modified file 'storage/csv/Makefile.am'
--- a/storage/csv/Makefile.am	2009-01-12 15:00:34 +0000
+++ b/storage/csv/Makefile.am	2009-07-02 14:23:36 +0000
@@ -36,3 +36,24 @@ noinst_LIBRARIES =	@plugin_csv_static_ta
 libcsv_a_SOURCES =	transparent_file.cc ha_tina.cc
 
 EXTRA_DIST =		CMakeLists.txt plug.in
+
+if HAVE_DTRACE_DASH_G
+libcsv_a_LIBADD = probes_mysql.o
+libcsv_a_DEPENDENCIES = probes_mysql.o dtrace_files dtrace_providers
+CLEANFILES = probes_mysql.o dtrace_files dtrace_providers
+DTRACEFILES = libcsv_a-ha_tina.o
+DTRACEPROVIDER = probes_mysql.d
+CLEANFILES += $(DTRACEPROVIDER) dtrace_sources
+
+dtrace_files:
+	echo $(DTRACEFILES) > $@
+dtrace_providers: probes_mysql.d
+	echo $(DTRACEPROVIDER) > $@
+probes_mysql.d:
+	-$(RM) -f probes_mysql.d
+	$(CP) $(top_srcdir)/include/probes_mysql.d.base probes_mysql.d
+	echo timestamp > dtrace_sources
+
+probes_mysql.o: $(DTRACEPROVIDER) $(DTRACEFILES)
+	$(DTRACE) $(DTRACEFLAGS) -G -s $(DTRACEPROVIDER) $(DTRACEFILES) -o $@
+endif

=== modified file 'storage/csv/ha_tina.cc'
--- a/storage/csv/ha_tina.cc	2009-03-24 09:23:56 +0000
+++ b/storage/csv/ha_tina.cc	2009-07-02 14:23:36 +0000
@@ -48,6 +48,7 @@ TODO:
 #include "mysql_priv.h"
 #include <mysql/plugin.h>
 #include "ha_tina.h"
+#include "probes_mysql.h"
 
 
 /*
@@ -1156,9 +1157,14 @@ int ha_tina::rnd_next(uchar *buf)
 {
   int rc;
   DBUG_ENTER("ha_tina::rnd_next");
+  MYSQL_READ_ROW_START(table_share->db.str, table_share->table_name.str,
+                       TRUE);
 
   if (share->crashed)
-      DBUG_RETURN(HA_ERR_CRASHED_ON_USAGE);
+  {
+    rc= HA_ERR_CRASHED_ON_USAGE;
+    goto end;
+  }
 
   ha_statistic_increment(&SSV::ha_read_rnd_next_count);
 
@@ -1166,13 +1172,19 @@ int ha_tina::rnd_next(uchar *buf)
 
   /* don't scan an empty file */
   if (!local_saved_data_file_length)
-    DBUG_RETURN(HA_ERR_END_OF_FILE);
+  {
+    rc= HA_ERR_END_OF_FILE;
+    goto end;
+  }
 
   if ((rc= find_current_row(buf)))
-    DBUG_RETURN(rc);
+    goto end;
 
   stats.records++;
-  DBUG_RETURN(0);
+  rc= 0;
+end:
+  MYSQL_READ_ROW_DONE(rc);
+  DBUG_RETURN(rc);
 }
 
 /*
@@ -1199,10 +1211,15 @@ void ha_tina::position(const uchar *reco
 
 int ha_tina::rnd_pos(uchar * buf, uchar *pos)
 {
+  int rc;
   DBUG_ENTER("ha_tina::rnd_pos");
+  MYSQL_READ_ROW_START(table_share->db.str, table_share->table_name.str,
+                       FALSE);
   ha_statistic_increment(&SSV::ha_read_rnd_count);
   current_position= my_get_ptr(pos,ref_length);
-  DBUG_RETURN(find_current_row(buf));
+  rc= find_current_row(buf);
+  MYSQL_READ_ROW_DONE(rc);
+  DBUG_RETURN(rc);
 }
 
 /*

=== modified file 'storage/example/Makefile.am'
--- a/storage/example/Makefile.am	2009-04-25 21:20:45 +0000
+++ b/storage/example/Makefile.am	2009-07-02 14:23:36 +0000
@@ -44,3 +44,24 @@ libexample_a_SOURCES=	ha_example.cc
 
 
 EXTRA_DIST =		CMakeLists.txt plug.in
+
+if HAVE_DTRACE_DASH_G
+libexample_a_LIBADD = probes_mysql.o
+libexample_a_DEPENDENCIES = probes_mysql.o
+CLEANFILES =
+BUILT_SOURCES =
+DTRACEFILES = libexample_a-ha_example.o
+DTRACEPROVIDER = probes_mysql.d
+
+dtrace_files:
+	echo $(DTRACEFILES) > $@
+dtrace_providers: probes_mysql.d
+	echo $(DTRACEPROVIDER) > $@
+probes_mysql.d:
+	-$(RM) -f probes_mysql.d
+	$(CP) $(top_srcdir)/include/probes_mysql.d.base probes_mysql.d
+	echo timestamp > dtrace_sources
+
+probes_mysql.o: $(DTRACEPROVIDER) $(DTRACEFILES)
+	$(DTRACE) $(DTRACEFLAGS) -G -s $(DTRACEPROVIDER) $(DTRACEFILES) -o $@
+endif

=== modified file 'storage/example/ha_example.cc'
--- a/storage/example/ha_example.cc	2009-04-25 21:20:45 +0000
+++ b/storage/example/ha_example.cc	2009-07-02 14:23:36 +0000
@@ -94,6 +94,7 @@
 #define MYSQL_SERVER 1
 #include "mysql_priv.h"
 #include "ha_example.h"
+#include "probes_mysql.h"
 #include <mysql/plugin.h>
 
 static handler *example_create_handler(handlerton *hton,
@@ -428,8 +429,12 @@ int ha_example::index_read_map(uchar *bu
                                enum ha_rkey_function find_flag
                                __attribute__((unused)))
 {
+  int rc;
   DBUG_ENTER("ha_example::index_read");
-  DBUG_RETURN(HA_ERR_WRONG_COMMAND);
+  MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
+  rc= HA_ERR_WRONG_COMMAND;
+  MYSQL_INDEX_READ_ROW_DONE(rc);
+  DBUG_RETURN(rc);
 }
 
 
@@ -440,8 +445,12 @@ int ha_example::index_read_map(uchar *bu
 
 int ha_example::index_next(uchar *buf)
 {
+  int rc;
   DBUG_ENTER("ha_example::index_next");
-  DBUG_RETURN(HA_ERR_WRONG_COMMAND);
+  MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
+  rc= HA_ERR_WRONG_COMMAND;
+  MYSQL_INDEX_READ_ROW_DONE(rc);
+  DBUG_RETURN(rc);
 }
 
 
@@ -452,8 +461,12 @@ int ha_example::index_next(uchar *buf)
 
 int ha_example::index_prev(uchar *buf)
 {
+  int rc;
   DBUG_ENTER("ha_example::index_prev");
-  DBUG_RETURN(HA_ERR_WRONG_COMMAND);
+  MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
+  rc= HA_ERR_WRONG_COMMAND;
+  MYSQL_INDEX_READ_ROW_DONE(rc);
+  DBUG_RETURN(rc);
 }
 
 
@@ -469,8 +482,12 @@ int ha_example::index_prev(uchar *buf)
 */
 int ha_example::index_first(uchar *buf)
 {
+  int rc;
   DBUG_ENTER("ha_example::index_first");
-  DBUG_RETURN(HA_ERR_WRONG_COMMAND);
+  MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
+  rc= HA_ERR_WRONG_COMMAND;
+  MYSQL_INDEX_READ_ROW_DONE(rc);
+  DBUG_RETURN(rc);
 }
 
 
@@ -486,8 +503,12 @@ int ha_example::index_first(uchar *buf)
 */
 int ha_example::index_last(uchar *buf)
 {
+  int rc;
   DBUG_ENTER("ha_example::index_last");
-  DBUG_RETURN(HA_ERR_WRONG_COMMAND);
+  MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
+  rc= HA_ERR_WRONG_COMMAND;
+  MYSQL_INDEX_READ_ROW_DONE(rc);
+  DBUG_RETURN(rc);
 }
 
 
@@ -533,8 +554,13 @@ int ha_example::rnd_end()
 */
 int ha_example::rnd_next(uchar *buf)
 {
+  int rc;
   DBUG_ENTER("ha_example::rnd_next");
-  DBUG_RETURN(HA_ERR_END_OF_FILE);
+  MYSQL_READ_ROW_START(table_share->db.str, table_share->table_name.str,
+                       TRUE);
+  rc= HA_ERR_END_OF_FILE;
+  MYSQL_READ_ROW_DONE(rc);
+  DBUG_RETURN(rc);
 }
 
 
@@ -581,8 +607,13 @@ void ha_example::position(const uchar *r
 */
 int ha_example::rnd_pos(uchar *buf, uchar *pos)
 {
+  int rc;
   DBUG_ENTER("ha_example::rnd_pos");
-  DBUG_RETURN(HA_ERR_WRONG_COMMAND);
+  MYSQL_READ_ROW_START(table_share->db.str, table_share->table_name.str,
+                       TRUE);
+  rc= HA_ERR_WRONG_COMMAND;
+  MYSQL_READ_ROW_DONE(rc);
+  DBUG_RETURN(rc);
 }
 
 

=== modified file 'storage/federated/Makefile.am'
--- a/storage/federated/Makefile.am	2009-01-07 10:58:33 +0000
+++ b/storage/federated/Makefile.am	2009-07-02 14:23:36 +0000
@@ -44,3 +44,23 @@ libfederated_a_SOURCES=	ha_federated.cc
 
 
 EXTRA_DIST =		CMakeLists.txt plug.in
+
+if HAVE_DTRACE_DASH_G
+libfederated_a_LIBADD = probes_mysql.o
+libfederated_a_DEPENDENCIES = probes_mysql.o dtrace_files dtrace_providers
+CLEANFILES = probes_mysql.o dtrace_files dtrace_providers
+DTRACEFILES = libfederated_a-ha_federated.o
+DTRACEPROVIDER = probes_mysql.d
+
+dtrace_files:
+	echo $(DTRACEFILES) > $@
+dtrace_providers: probes_mysql.d
+	echo $(DTRACEPROVIDER) > $@
+probes_mysql.d:
+	-$(RM) -f probes_mysql.d
+	$(CP) $(top_srcdir)/include/probes_mysql.d.base probes_mysql.d
+	echo timestamp > dtrace_sources
+
+probes_mysql.o: $(DTRACEPROVIDER) $(DTRACEFILES)
+	$(DTRACE) $(DTRACEFLAGS) -G -s $(DTRACEPROVIDER) $(DTRACEFILES) -o $@
+endif

=== modified file 'storage/federated/ha_federated.cc'
--- a/storage/federated/ha_federated.cc	2009-05-15 13:45:06 +0000
+++ b/storage/federated/ha_federated.cc	2009-07-02 14:23:36 +0000
@@ -380,6 +380,7 @@
 #endif
 
 #include "ha_federated.h"
+#include "probes_mysql.h"
 
 #include "m_string.h"
 
@@ -2317,12 +2318,16 @@ int ha_federated::delete_row(const uchar
 int ha_federated::index_read(uchar *buf, const uchar *key,
                              uint key_len, ha_rkey_function find_flag)
 {
+  int rc;
   DBUG_ENTER("ha_federated::index_read");
 
+  MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
   free_result();
-  DBUG_RETURN(index_read_idx_with_result_set(buf, active_index, key,
-                                             key_len, find_flag,
-                                             &stored_result));
+  rc= index_read_idx_with_result_set(buf, active_index, key,
+                                     key_len, find_flag,
+                                     &stored_result);
+  MYSQL_INDEX_READ_ROW_DONE(rc);
+  DBUG_RETURN(rc);
 }
 
 
@@ -2473,6 +2478,7 @@ int ha_federated::read_range_first(const
                    sizeof(sql_query_buffer),
                    &my_charset_bin);
   DBUG_ENTER("ha_federated::read_range_first");
+  MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
 
   DBUG_ASSERT(!(start_key == NULL && end_key == NULL));
 
@@ -2493,28 +2499,39 @@ int ha_federated::read_range_first(const
     retval= HA_ERR_END_OF_FILE;
     goto error;
   }
-  
-  DBUG_RETURN(read_next(table->record[0], stored_result));
+
+  retval= read_next(table->record[0], stored_result);
+  MYSQL_INDEX_READ_ROW_DONE(retval);
+  DBUG_RETURN(retval);
 
 error:
   table->status= STATUS_NOT_FOUND;
+  MYSQL_INDEX_READ_ROW_DONE(retval);
   DBUG_RETURN(retval);
 }
 
 
 int ha_federated::read_range_next()
 {
+  int retval;
   DBUG_ENTER("ha_federated::read_range_next");
-  DBUG_RETURN(rnd_next(table->record[0]));
+  MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
+  retval= rnd_next_int(table->record[0]);
+  MYSQL_INDEX_READ_ROW_DONE(retval);
+  DBUG_RETURN(retval);
 }
 
 
 /* Used to read forward through the index.  */
 int ha_federated::index_next(uchar *buf)
 {
+  int retval;
   DBUG_ENTER("ha_federated::index_next");
+  MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
   ha_statistic_increment(&SSV::ha_read_next_count);
-  DBUG_RETURN(read_next(buf, stored_result));
+  retval= read_next(buf, stored_result);
+  MYSQL_INDEX_READ_ROW_DONE(retval);
+  DBUG_RETURN(retval);
 }
 
 
@@ -2607,7 +2624,18 @@ int ha_federated::index_end(void)
 
 int ha_federated::rnd_next(uchar *buf)
 {
+  int rc;
   DBUG_ENTER("ha_federated::rnd_next");
+  MYSQL_READ_ROW_START(table_share->db.str, table_share->table_name.str,
+                       TRUE);
+  rc= rnd_next_int(buf);
+  MYSQL_READ_ROW_DONE(rc);
+  DBUG_RETURN(rc);
+}
+
+int ha_federated::rnd_next_int(uchar *buf)
+{
+  DBUG_ENTER("ha_federated::rnd_next_int");
 
   if (stored_result == 0)
   {
@@ -2712,8 +2740,11 @@ void ha_federated::position(const uchar 
 int ha_federated::rnd_pos(uchar *buf, uchar *pos)
 {
   MYSQL_RES *result;
+  int ret_val;
   DBUG_ENTER("ha_federated::rnd_pos");
-  
+
+  MYSQL_READ_ROW_START(table_share->db.str, table_share->table_name.str,
+                       FALSE);
   ha_statistic_increment(&SSV::ha_read_rnd_count);
 
   /* Get stored result set. */
@@ -2723,7 +2754,9 @@ int ha_federated::rnd_pos(uchar *buf, uc
   memcpy_fixed(&result->data_cursor, pos + sizeof(MYSQL_RES *),
                sizeof(MYSQL_ROW_OFFSET));
   /* Read a row. */
-  DBUG_RETURN(read_next(buf, result));
+  ret_val= read_next(buf, result);
+  MYSQL_READ_ROW_DONE(ret_val);
+  DBUG_RETURN(ret_val);
 }
 
 

=== modified file 'storage/federated/ha_federated.h'
--- a/storage/federated/ha_federated.h	2009-03-17 20:07:27 +0000
+++ b/storage/federated/ha_federated.h	2009-07-02 14:23:36 +0000
@@ -237,6 +237,7 @@ public:
   int rnd_init(bool scan);                                      //required
   int rnd_end();
   int rnd_next(uchar *buf);                                      //required
+  int rnd_next_int(uchar *buf);
   int rnd_pos(uchar *buf, uchar *pos);                            //required
   void position(const uchar *record);                            //required
   int info(uint);                                              //required

=== modified file 'storage/heap/Makefile.am'
--- a/storage/heap/Makefile.am	2009-01-07 10:58:33 +0000
+++ b/storage/heap/Makefile.am	2009-07-02 14:23:36 +0000
@@ -49,3 +49,24 @@ libheap_a_SOURCES =	hp_open.c hp_extra.c
 
 
 EXTRA_DIST =		CMakeLists.txt plug.in
+
+if HAVE_DTRACE_DASH_G
+libheap_a_LIBADD = probes_mysql.o
+libheap_a_DEPENDENCIES = probes_mysql.o dtrace_files dtrace_providers
+CLEANFILES = probes_mysql.o dtrace_files dtrace_providers
+DTRACEFILES = ha_heap.o
+DTRACEPROVIDER = probes_mysql.d
+CLEANFILES += $(DTRACEPROVIDER) dtrace_sources
+
+dtrace_files:
+	echo $(DTRACEFILES) > $@
+dtrace_providers: probes_mysql.d
+	echo $(DTRACEPROVIDER) > $@
+probes_mysql.d:
+	-$(RM) -f probes_mysql.d
+	$(CP) $(top_srcdir)/include/probes_mysql.d.base probes_mysql.d
+	echo timestamp > dtrace_sources
+
+probes_mysql.o: $(DTRACEPROVIDER) $(DTRACEFILES)
+	$(DTRACE) $(DTRACEFLAGS) -G -s $(DTRACEPROVIDER) $(DTRACEFILES) -o $@
+endif

=== modified file 'storage/heap/ha_heap.cc'
--- a/storage/heap/ha_heap.cc	2008-12-16 12:12:22 +0000
+++ b/storage/heap/ha_heap.cc	2009-02-17 12:24:09 +0000
@@ -20,6 +20,7 @@
 
 #define MYSQL_SERVER 1
 #include "mysql_priv.h"
+#include "probes_mysql.h"
 #include <mysql/plugin.h>
 #include "ha_heap.h"
 #include "heapdef.h"
@@ -274,21 +275,25 @@ int ha_heap::index_read_map(uchar *buf, 
                             key_part_map keypart_map,
                             enum ha_rkey_function find_flag)
 {
+  MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
   DBUG_ASSERT(inited==INDEX);
   ha_statistic_increment(&SSV::ha_read_key_count);
   int error = heap_rkey(file,buf,active_index, key, keypart_map, find_flag);
   table->status = error ? STATUS_NOT_FOUND : 0;
+  MYSQL_INDEX_READ_ROW_DONE(error);
   return error;
 }
 
 int ha_heap::index_read_last_map(uchar *buf, const uchar *key,
                                  key_part_map keypart_map)
 {
+  MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
   DBUG_ASSERT(inited==INDEX);
   ha_statistic_increment(&SSV::ha_read_key_count);
   int error= heap_rkey(file, buf, active_index, key, keypart_map,
 		       HA_READ_PREFIX_LAST);
   table->status= error ? STATUS_NOT_FOUND : 0;
+  MYSQL_INDEX_READ_ROW_DONE(error);
   return error;
 }
 
@@ -296,45 +301,55 @@ int ha_heap::index_read_idx_map(uchar *b
                                 key_part_map keypart_map,
                                 enum ha_rkey_function find_flag)
 {
+  MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
   ha_statistic_increment(&SSV::ha_read_key_count);
   int error = heap_rkey(file, buf, index, key, keypart_map, find_flag);
   table->status = error ? STATUS_NOT_FOUND : 0;
+  MYSQL_INDEX_READ_ROW_DONE(error);
   return error;
 }
 
 int ha_heap::index_next(uchar * buf)
 {
+  MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
   DBUG_ASSERT(inited==INDEX);
   ha_statistic_increment(&SSV::ha_read_next_count);
   int error=heap_rnext(file,buf);
   table->status=error ? STATUS_NOT_FOUND: 0;
+  MYSQL_INDEX_READ_ROW_DONE(error);
   return error;
 }
 
 int ha_heap::index_prev(uchar * buf)
 {
+  MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
   DBUG_ASSERT(inited==INDEX);
   ha_statistic_increment(&SSV::ha_read_prev_count);
   int error=heap_rprev(file,buf);
   table->status=error ? STATUS_NOT_FOUND: 0;
+  MYSQL_INDEX_READ_ROW_DONE(error);
   return error;
 }
 
 int ha_heap::index_first(uchar * buf)
 {
+  MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
   DBUG_ASSERT(inited==INDEX);
   ha_statistic_increment(&SSV::ha_read_first_count);
   int error=heap_rfirst(file, buf, active_index);
   table->status=error ? STATUS_NOT_FOUND: 0;
+  MYSQL_INDEX_READ_ROW_DONE(error);
   return error;
 }
 
 int ha_heap::index_last(uchar * buf)
 {
+  MYSQL_INDEX_READ_ROW_START(table_share->db.str, table_share->table_name.str);
   DBUG_ASSERT(inited==INDEX);
   ha_statistic_increment(&SSV::ha_read_last_count);
   int error=heap_rlast(file, buf, active_index);
   table->status=error ? STATUS_NOT_FOUND: 0;
+  MYSQL_INDEX_READ_ROW_DONE(error);
   return error;
 }
 
@@ -345,9 +360,12 @@ int ha_heap::rnd_init(bool scan)
 
 int ha_heap::rnd_next(uchar *buf)
 {
+  MYSQL_READ_ROW_START(table_share->db.str, table_share->table_name.str,
+                       TRUE);
   ha_statistic_increment(&SSV::ha_read_rnd_next_count);
   int error=heap_scan(file, buf);
   table->status=error ? STATUS_NOT_FOUND: 0;
+  MYSQL_READ_ROW_DONE(error);
   return error;
 }
 
@@ -355,10 +373,13 @@ int ha_heap::rnd_pos(uchar * buf, uchar 
 {
   int error;
   HEAP_PTR heap_position;
+  MYSQL_READ_ROW_START(table_share->db.str, table_share->table_name.str,
+                       FALSE);
   ha_statistic_increment(&SSV::ha_read_rnd_count);
   memcpy_fixed((char*) &heap_position, pos, sizeof(HEAP_PTR));
   error=heap_rrnd(file, buf, heap_position);
   table->status=error ? STATUS_NOT_FOUND: 0;
+  MYSQL_READ_ROW_DONE(error);
   return error;
 }
 

=== modified file 'storage/innobase/CMakeLists.txt'
--- a/storage/innobase/CMakeLists.txt	2009-03-28 00:27:25 +0000
+++ b/storage/innobase/CMakeLists.txt	2009-07-02 14:23:36 +0000
@@ -28,6 +28,28 @@ INCLUDE_DIRECTORIES(
 					${CMAKE_SOURCE_DIR}/storage/innobase/include
 					${CMAKE_SOURCE_DIR}/storage/innobase/handler
 					)
+IF (WIN32)
+  IF (NOT WITHOUT_ATOMICS)
+# Check if this Windows version supports atomic instructions
+    IF (CMAKE_SIZEOF_VOID_P MATCHES 8)
+# Check for 64 bit atomics
+      TRY_RUN(RUN_RES COMPILE_RES ${CMAKE_BINARY_DIR}
+              ${CMAKE_SOURCE_DIR}/storage/innobase/win_atomics64_test.c)
+      IF (COMPILE_RES AND NOT RUN_RES)
+        MESSAGE("Adding support for Win64 atomics")
+        ADD_DEFINITIONS(-DWIN_ATOMICS64)
+      ENDIF (COMPILE_RES AND NOT RUN_RES)
+    ELSE (CMAKE_SIZEOF_VOID_P MATCHES 8)
+# Check for 32 bit atomics
+      TRY_RUN(RUN_RES COMPILE_RES ${CMAKE_BINARY_DIR}
+              ${CMAKE_SOURCE_DIR}/storage/innobase/win_atomics32_test.c)
+      IF (COMPILE_RES AND NOT RUN_RES)
+        MESSAGE("Adding support for Win32 atomics")
+        ADD_DEFINITIONS(-DWIN_ATOMICS32)
+      ENDIF (COMPILE_RES AND NOT RUN_RES)
+    ENDIF (CMAKE_SIZEOF_VOID_P MATCHES 8)
+  ENDIF (NOT WITHOUT_ATOMICS)
+ENDIF (WIN32)
 
 SET(INNOBASE_SOURCES  btr/btr0btr.c btr/btr0cur.c btr/btr0pcur.c btr/btr0sea.c 
 					 buf/buf0buf.c buf/buf0flu.c buf/buf0lru.c buf/buf0rea.c 

=== modified file 'storage/innobase/Makefile.am'
--- a/storage/innobase/Makefile.am	2009-01-24 21:18:12 +0000
+++ b/storage/innobase/Makefile.am	2009-07-02 14:23:36 +0000
@@ -163,4 +163,5 @@ ha_innodb_la_SOURCES=	$(libinnobase_a_SO
 
 EXTRA_DIST=		CMakeLists.txt plug.in \
 			pars/make_bison.sh pars/make_flex.sh \
-			pars/pars0grm.y pars/pars0lex.l
+			pars/pars0grm.y pars/pars0lex.l \
+			win_atomics32_test.c win_atomics64_test.c

=== modified file 'storage/innobase/btr/btr0cur.c'
--- a/storage/innobase/btr/btr0cur.c	2008-12-19 00:34:15 +0000
+++ b/storage/innobase/btr/btr0cur.c	2009-07-02 14:23:36 +0000
@@ -333,7 +333,7 @@ btr_cur_search_to_nth_level(
 #ifdef UNIV_SEARCH_PERF_STAT
 	info->n_searches++;
 #endif
-	if (btr_search_latch.writer == RW_LOCK_NOT_LOCKED
+	if (rw_lock_get_writer(&btr_search_latch) == RW_LOCK_NOT_LOCKED
 	    && latch_mode <= BTR_MODIFY_LEAF && info->last_hash_succ
 	    && !estimate
 #ifdef PAGE_CUR_LE_OR_EXTENDS

=== modified file 'storage/innobase/btr/btr0sea.c'
--- a/storage/innobase/btr/btr0sea.c	2009-04-24 12:16:40 +0000
+++ b/storage/innobase/btr/btr0sea.c	2009-07-02 14:23:36 +0000
@@ -774,8 +774,8 @@ btr_search_guess_on_hash(
 		rw_lock_s_lock(&btr_search_latch);
 	}
 
-	ut_ad(btr_search_latch.writer != RW_LOCK_EX);
-	ut_ad(btr_search_latch.reader_count > 0);
+	ut_ad(rw_lock_get_writer(&btr_search_latch) != RW_LOCK_EX);
+	ut_ad(rw_lock_get_reader_count(&btr_search_latch) > 0);
 
 	rec = ha_search_and_get_data(btr_search_sys->hash_index, fold);
 

=== modified file 'storage/innobase/buf/buf0buf.c'
--- a/storage/innobase/buf/buf0buf.c	2008-09-06 07:22:50 +0000
+++ b/storage/innobase/buf/buf0buf.c	2009-07-02 14:23:36 +0000
@@ -1277,8 +1277,8 @@ loop:
 
 	if (mode == BUF_GET_NOWAIT) {
 		if (rw_latch == RW_S_LATCH) {
-			success = rw_lock_s_lock_func_nowait(&(block->lock),
-							     file, line);
+			success = rw_lock_s_lock_nowait(&(block->lock),
+							file, line);
 			fix_type = MTR_MEMO_PAGE_S_FIX;
 		} else {
 			ut_ad(rw_latch == RW_X_LATCH);
@@ -1403,8 +1403,8 @@ buf_page_optimistic_get_func(
 	ut_ad(!ibuf_inside() || ibuf_page(block->space, block->offset));
 
 	if (rw_latch == RW_S_LATCH) {
-		success = rw_lock_s_lock_func_nowait(&(block->lock),
-						     file, line);
+		success = rw_lock_s_lock_nowait(&(block->lock),
+						file, line);
 		fix_type = MTR_MEMO_PAGE_S_FIX;
 	} else {
 		success = rw_lock_x_lock_func_nowait(&(block->lock),
@@ -1534,8 +1534,8 @@ buf_page_get_known_nowait(
 	ut_ad(!ibuf_inside() || (mode == BUF_KEEP_OLD));
 
 	if (rw_latch == RW_S_LATCH) {
-		success = rw_lock_s_lock_func_nowait(&(block->lock),
-						     file, line);
+		success = rw_lock_s_lock_nowait(&(block->lock),
+						file, line);
 		fix_type = MTR_MEMO_PAGE_S_FIX;
 	} else {
 		success = rw_lock_x_lock_func_nowait(&(block->lock),

=== modified file 'storage/innobase/handler/ha_innodb.cc'
--- a/storage/innobase/handler/ha_innodb.cc	2009-06-18 08:20:27 +0000
+++ b/storage/innobase/handler/ha_innodb.cc	2009-07-02 14:23:36 +0000
@@ -104,7 +104,7 @@ static const long AUTOINC_NO_LOCKING = 2
 
 static long innobase_mirrored_log_groups, innobase_log_files_in_group,
 	innobase_log_buffer_size, innobase_buffer_pool_awe_mem_mb,
-	innobase_additional_mem_pool_size, innobase_file_io_threads,
+	innobase_additional_mem_pool_size,
 	innobase_lock_wait_timeout, innobase_force_recovery,
 	innobase_open_files, innobase_autoinc_lock_mode;
 
@@ -139,6 +139,24 @@ static my_bool	innobase_adaptive_hash_in
 
 static char*	internal_innobase_data_file_path	= NULL;
 
+/* Default number of IO per second supported by server. Tunes background
+   IO rate. */
+static long innobase_io_capacity = 100;
+
+/* Write dirty pages when pct dirty is less than max pct dirty */
+static my_bool innobase_extra_dirty_writes = TRUE;
+
+/* Max number of IO requests merged to perform large IO in background
+   IO threads.
+*/
+long innobase_max_merged_io = 64;
+
+/* Number of background IO threads for read and write. */
+long innobase_read_io_threads, innobase_write_io_threads;
+
+/* Use timer based InnoDB concurrency throttling flag */
+static my_bool innobase_thread_concurrency_timer_based;
+
 /* The following counter is used to convey information to InnoDB
 about server activity: in selects it is not sensible to call
 srv_active_wake_master_thread after each fetch or search, we only do
@@ -380,6 +398,10 @@ static SHOW_VAR innodb_status_variables[
   (char*) &export_vars.innodb_dblwr_pages_written,	  SHOW_LONG},
   {"dblwr_writes",
   (char*) &export_vars.innodb_dblwr_writes,		  SHOW_LONG},
+  {"have_sync_atomic",
+  (char*) &export_vars.innodb_have_sync_atomic,		  SHOW_BOOL},
+  {"heap_enabled",
+  (char*) &export_vars.innodb_heap_enabled,		  SHOW_BOOL},
   {"log_waits",
   (char*) &export_vars.innodb_log_waits,		  SHOW_LONG},
   {"log_write_requests",
@@ -420,6 +442,8 @@ static SHOW_VAR innodb_status_variables[
   (char*) &export_vars.innodb_rows_read,		  SHOW_LONG},
   {"rows_updated",
   (char*) &export_vars.innodb_rows_updated,		  SHOW_LONG},
+  {"wake_ups",
+  (char*) &export_vars.innodb_wake_ups,		  SHOW_LONG},
   {NullS, NullS, SHOW_LONG}
 };
 
@@ -1702,11 +1726,17 @@ innobase_init(
 	srv_n_log_files = (ulint) innobase_log_files_in_group;
 	srv_log_file_size = (ulint) innobase_log_file_size;
 
+        srv_thread_concurrency_timer_based =
+          (ibool) innobase_thread_concurrency_timer_based;
+
 #ifdef UNIV_LOG_ARCHIVE
 	srv_log_archive_on = (ulint) innobase_log_archive;
 #endif /* UNIV_LOG_ARCHIVE */
 	srv_log_buffer_size = (ulint) innobase_log_buffer_size;
 
+	srv_io_capacity = (ulint) innobase_io_capacity;
+	srv_extra_dirty_writes = (ulint) innobase_extra_dirty_writes;
+
 	/* We set srv_pool_size here in units of 1 kB. InnoDB internally
 	changes the value so that it becomes the number of database pages. */
 
@@ -1726,7 +1756,9 @@ innobase_init(
 
 	srv_mem_pool_size = (ulint) innobase_additional_mem_pool_size;
 
-	srv_n_file_io_threads = (ulint) innobase_file_io_threads;
+        srv_n_read_io_threads = (ulint) innobase_read_io_threads;
+        srv_n_write_io_threads = (ulint) innobase_write_io_threads;
+        srv_max_merged_io = (ulint) innobase_max_merged_io;
 
 	srv_lock_wait_timeout = (ulint) innobase_lock_wait_timeout;
 	srv_force_recovery = (ulint) innobase_force_recovery;
@@ -7174,8 +7206,7 @@ innodb_show_status(
 
 	mutex_enter_noninline(&srv_monitor_file_mutex);
 	rewind(srv_monitor_file);
-	srv_printf_innodb_monitor(srv_monitor_file,
-				&trx_list_start, &trx_list_end);
+	srv_printf_innodb_monitor(srv_monitor_file);
 	flen = ftell(srv_monitor_file);
 	os_file_set_eof(srv_monitor_file);
 
@@ -7244,6 +7275,7 @@ innodb_mutex_show_status(
 {
 	char buf1[IO_SIZE], buf2[IO_SIZE];
 	mutex_t*  mutex;
+	rw_lock_t* lock;
 #ifdef UNIV_DEBUG
 	ulint	  rw_lock_count= 0;
 	ulint	  rw_lock_count_spin_loop= 0;
@@ -7314,6 +7346,31 @@ innodb_mutex_show_status(
 
 	mutex_exit_noninline(&mutex_list_mutex);
 
+	mutex_enter_noninline(&rw_lock_list_mutex);
+
+	lock = UT_LIST_GET_FIRST(rw_lock_list);
+
+	while (lock != NULL)
+	{
+		if (lock->count_os_wait)
+		{
+			buf1len= my_snprintf(buf1, sizeof(buf1), "%s:%lu",
+                                    lock->cfile_name, (ulong) lock->cline);
+			buf2len= my_snprintf(buf2, sizeof(buf2),
+                                    "os_waits=%lu", lock->count_os_wait);
+
+			if (stat_print(thd, innobase_hton_name,
+				       hton_name_len, buf1, buf1len,
+				       buf2, buf2len)) {
+				mutex_exit_noninline(&rw_lock_list_mutex);
+				DBUG_RETURN(1);
+			}
+		}
+		lock = UT_LIST_GET_NEXT(list, lock);
+	}
+
+	mutex_exit_noninline(&rw_lock_list_mutex);
+
 #ifdef UNIV_DEBUG
 	buf2len= my_snprintf(buf2, sizeof(buf2),
 		"count=%lu, spin_waits=%lu, spin_rounds=%lu, "
@@ -7346,6 +7403,7 @@ bool innobase_show_status(handlerton *ht
 		return FALSE;
 	}
 }
+	rw_lock_t* lock;
 
 
 /****************************************************************************
@@ -8278,6 +8336,16 @@ static MYSQL_SYSVAR_BOOL(doublewrite, in
   "Disable with --skip-innodb-doublewrite.",
   NULL, NULL, TRUE);
 
+static MYSQL_SYSVAR_BOOL(extra_dirty_writes, innobase_extra_dirty_writes,
+  PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
+  "Flush dirty buffer pages when dirty max pct is not exceeded",
+  NULL, NULL, TRUE);
+
+static MYSQL_SYSVAR_LONG(io_capacity, innobase_io_capacity,
+  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+  "Number of IOPs the server can do. Tunes the background IO rate",
+  NULL, NULL, (long)200, (long)100, LONG_MAX, (long)0);
+
 static MYSQL_SYSVAR_ULONG(fast_shutdown, innobase_fast_shutdown,
   PLUGIN_VAR_OPCMDARG,
   "Speeds up the shutdown process of the InnoDB storage engine. Possible "
@@ -8288,7 +8356,8 @@ static MYSQL_SYSVAR_ULONG(fast_shutdown,
   */
   IF_NETWARE("", " or 2 (fastest - crash-like)")
   ".",
-  NULL, NULL, 1, 0, IF_NETWARE(1,2), 0);
+  NULL, NULL, (unsigned long)1, (unsigned long)0,
+  (unsigned long)IF_NETWARE(1,2), (unsigned long)0);
 
 static MYSQL_SYSVAR_BOOL(file_per_table, innobase_file_per_table,
   PLUGIN_VAR_NOCMDARG | PLUGIN_VAR_READONLY,
@@ -8300,7 +8369,8 @@ static MYSQL_SYSVAR_ULONG(flush_log_at_t
   "Set to 0 (write and flush once per second),"
   " 1 (write and flush at each commit)"
   " or 2 (write at commit, flush once per second).",
-  NULL, NULL, 1, 0, 2, 0);
+  NULL, NULL, (unsigned long)1, (unsigned long)0, (unsigned long)2,
+  (unsigned long)0);
 
 static MYSQL_SYSVAR_STR(flush_method, innobase_unix_file_flush_method,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
@@ -8328,12 +8398,14 @@ static MYSQL_SYSVAR_STR(log_group_home_d
 static MYSQL_SYSVAR_ULONG(max_dirty_pages_pct, srv_max_buf_pool_modified_pct,
   PLUGIN_VAR_RQCMDARG,
   "Percentage of dirty pages allowed in bufferpool.",
-  NULL, NULL, 90, 0, 100, 0);
+  NULL, NULL, (unsigned long)75, (unsigned long)0, (unsigned long)99,
+  (unsigned long)0);
 
 static MYSQL_SYSVAR_ULONG(max_purge_lag, srv_max_purge_lag,
   PLUGIN_VAR_RQCMDARG,
   "Desired maximum length of the purge queue (0 = no limit)",
-  NULL, NULL, 0, 0, ~0L, 0);
+  NULL, NULL, (unsigned long)0, (unsigned long)0, (unsigned long)~0L,
+  (unsigned long)0);
 
 static MYSQL_SYSVAR_BOOL(rollback_on_timeout, innobase_rollback_on_timeout,
   PLUGIN_VAR_OPCMDARG | PLUGIN_VAR_READONLY,
@@ -8359,82 +8431,106 @@ static MYSQL_SYSVAR_BOOL(adaptive_hash_i
 static MYSQL_SYSVAR_LONG(additional_mem_pool_size, innobase_additional_mem_pool_size,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
   "Size of a memory pool InnoDB uses to store data dictionary information and other internal data structures.",
-  NULL, NULL, 1*1024*1024L, 512*1024L, LONG_MAX, 1024);
+  NULL, NULL, (long)8*1024*1024L, (long)2*1024*1024L, LONG_MAX, (long)1024);
 
 static MYSQL_SYSVAR_ULONG(autoextend_increment, srv_auto_extend_increment,
   PLUGIN_VAR_RQCMDARG,
   "Data file autoextend increment in megabytes",
-  NULL, NULL, 8L, 1L, 1000L, 0);
+  NULL, NULL, (unsigned long)64L, (unsigned long)1L, (unsigned long)1000L,
+  (unsigned long)0);
 
 static MYSQL_SYSVAR_LONGLONG(buffer_pool_size, innobase_buffer_pool_size,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
   "The size of the memory buffer InnoDB uses to cache data and indexes of its tables.",
-  NULL, NULL, 8*1024*1024L, 1024*1024L, LONGLONG_MAX, 1024*1024L);
+  NULL, NULL, (long long)1024*1024*1024L, (long long)64*1024*1024L,
+  LONGLONG_MAX, (long long)1024*1024L);
 
 static MYSQL_SYSVAR_ULONG(commit_concurrency, srv_commit_concurrency,
   PLUGIN_VAR_RQCMDARG,
   "Helps in performance tuning in heavily concurrent environments.",
-  NULL, NULL, 0, 0, 1000, 0);
+  NULL, NULL, (unsigned long)0, (unsigned long)0, (unsigned long)1000,
+  (unsigned long)0);
 
 static MYSQL_SYSVAR_ULONG(concurrency_tickets, srv_n_free_tickets_to_enter,
   PLUGIN_VAR_RQCMDARG,
   "Number of times a thread is allowed to enter InnoDB within the same SQL query after it has once got the ticket",
-  NULL, NULL, 500L, 1L, ~0L, 0);
+  NULL, NULL, (unsigned long)500L, (unsigned long)1L, (unsigned long)~0L,
+  (unsigned long)0);
+
+static MYSQL_SYSVAR_LONG(write_io_threads, innobase_write_io_threads,
+  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+  "Number of write I/O threads in InnoDB.",
+  NULL, NULL, (long)8, (long)1, (long)64, (long)0);
 
-static MYSQL_SYSVAR_LONG(file_io_threads, innobase_file_io_threads,
+static MYSQL_SYSVAR_LONG(read_io_threads, innobase_read_io_threads,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
-  "Number of file I/O threads in InnoDB.",
-  NULL, NULL, 4, 4, 64, 0);
+  "Number of read I/O threads in InnoDB.",
+  NULL, NULL, (long)8, (long)1, (long)64, (long)0);
+
+static MYSQL_SYSVAR_LONG(max_merged_io, innobase_max_merged_io,
+  PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+  "Max number of adjacent IO requests to merge in InnoDB.",
+  NULL, NULL, (long)64, (long)1, (long)64, (long)0);
 
 static MYSQL_SYSVAR_LONG(force_recovery, innobase_force_recovery,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
   "Helps to save your data in case the disk image of the database becomes corrupt.",
-  NULL, NULL, 0, 0, 6, 0);
+  NULL, NULL, (long)0, (long)0, (long)6, (long)0);
 
 static MYSQL_SYSVAR_LONG(lock_wait_timeout, innobase_lock_wait_timeout,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
   "Timeout in seconds an InnoDB transaction may wait for a lock before being rolled back.",
-  NULL, NULL, 50, 1, 1024 * 1024 * 1024, 0);
+  NULL, NULL, (long)50, (long)1, (long)(1024*1024*1024), (long)0);
 
 static MYSQL_SYSVAR_LONG(log_buffer_size, innobase_log_buffer_size,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
   "The size of the buffer which InnoDB uses to write log to the log files on disk.",
-  NULL, NULL, 1024*1024L, 256*1024L, LONG_MAX, 1024);
+  NULL, NULL, (long)16*1024*1024L, (long)2*1024*1024L, LONG_MAX, (long)1024);
 
 static MYSQL_SYSVAR_LONGLONG(log_file_size, innobase_log_file_size,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
   "Size of each log file in a log group.",
-  NULL, NULL, 5*1024*1024L, 1*1024*1024L, LONGLONG_MAX, 1024*1024L);
+  NULL, NULL, (long long)128*1024*1024L, (long long)32*1024*1024L,
+  LONGLONG_MAX, (long long)1024*1024L);
 
 static MYSQL_SYSVAR_LONG(log_files_in_group, innobase_log_files_in_group,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
   "Number of log files in the log group. InnoDB writes to the files in a circular fashion. Value 3 is recommended here.",
-  NULL, NULL, 2, 2, 100, 0);
+  NULL, NULL, (long)3, (long)2, (long)100, (long)0);
 
 static MYSQL_SYSVAR_LONG(mirrored_log_groups, innobase_mirrored_log_groups,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
   "Number of identical copies of log groups we keep for the database. Currently this should be set to 1.",
-  NULL, NULL, 1, 1, 10, 0);
+  NULL, NULL, (long)1, (long)1, (long)10, (long)0);
 
 static MYSQL_SYSVAR_LONG(open_files, innobase_open_files,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
   "How many files at the maximum InnoDB keeps open at the same time.",
-  NULL, NULL, 300L, 10L, LONG_MAX, 0);
+  NULL, NULL, (long)300L, (long)10L, LONG_MAX, (long)0L);
 
 static MYSQL_SYSVAR_ULONG(sync_spin_loops, srv_n_spin_wait_rounds,
   PLUGIN_VAR_RQCMDARG,
   "Count of spin-loop rounds in InnoDB mutexes",
-  NULL, NULL, 20L, 0L, ~0L, 0);
+  NULL, NULL, (unsigned long)20L, (unsigned long)0L, (unsigned long)~0L,
+  (unsigned long)0L);
+
+static MYSQL_SYSVAR_BOOL(thread_concurrency_timer_based,
+                         innobase_thread_concurrency_timer_based,
+                         PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
+  "Use InnoDB timer based concurrency throttling. ",
+  NULL, NULL, TRUE);
 
 static MYSQL_SYSVAR_ULONG(thread_concurrency, srv_thread_concurrency,
   PLUGIN_VAR_RQCMDARG,
   "Helps in performance tuning in heavily concurrent environments. Sets the maximum number of threads allowed inside InnoDB. Value 0 will disable the thread throttling.",
-  NULL, NULL, 8, 0, 1000, 0);
+  NULL, NULL, (unsigned long)0, (unsigned long)0, (unsigned long)1000,
+  (unsigned long)0);
 
 static MYSQL_SYSVAR_ULONG(thread_sleep_delay, srv_thread_sleep_delay,
   PLUGIN_VAR_RQCMDARG,
   "Time of innodb thread sleeping before joining InnoDB queue (usec). Value 0 disable a sleep",
-  NULL, NULL, 10000L, 0L, ~0L, 0);
+  NULL, NULL, (unsigned long)10000L, (unsigned long)0L, (unsigned long)~0L,
+  (unsigned long)0);
 
 static MYSQL_SYSVAR_STR(data_file_path, innobase_data_file_path,
   PLUGIN_VAR_RQCMDARG | PLUGIN_VAR_READONLY,
@@ -8451,7 +8547,7 @@ static MYSQL_SYSVAR_LONG(autoinc_lock_mo
   NULL, NULL,
   AUTOINC_NEW_STYLE_LOCKING,	/* Default setting */
   AUTOINC_OLD_STYLE_LOCKING,	/* Minimum value */
-  AUTOINC_NO_LOCKING, 0);	/* Maximum value */
+  AUTOINC_NO_LOCKING, (long)0);	/* Maximum value */
 
 static struct st_mysql_sys_var* innobase_system_variables[]= {
   MYSQL_SYSVAR(additional_mem_pool_size),
@@ -8464,7 +8560,10 @@ static struct st_mysql_sys_var* innobase
   MYSQL_SYSVAR(data_home_dir),
   MYSQL_SYSVAR(doublewrite),
   MYSQL_SYSVAR(fast_shutdown),
-  MYSQL_SYSVAR(file_io_threads),
+  MYSQL_SYSVAR(read_io_threads),
+  MYSQL_SYSVAR(write_io_threads),
+  MYSQL_SYSVAR(max_merged_io),
+  MYSQL_SYSVAR(thread_concurrency_timer_based),
   MYSQL_SYSVAR(file_per_table),
   MYSQL_SYSVAR(flush_log_at_trx_commit),
   MYSQL_SYSVAR(flush_method),
@@ -8493,6 +8592,8 @@ static struct st_mysql_sys_var* innobase
   MYSQL_SYSVAR(thread_concurrency),
   MYSQL_SYSVAR(thread_sleep_delay),
   MYSQL_SYSVAR(autoinc_lock_mode),
+  MYSQL_SYSVAR(extra_dirty_writes),
+  MYSQL_SYSVAR(io_capacity),
   NULL
 };
 

=== modified file 'storage/innobase/include/buf0buf.ic'
--- a/storage/innobase/include/buf0buf.ic	2008-02-01 10:55:39 +0000
+++ b/storage/innobase/include/buf0buf.ic	2008-10-15 18:54:18 +0000
@@ -513,7 +513,7 @@ buf_block_buf_fix_inc_debug(
 {
 	ibool	ret;
 
-	ret = rw_lock_s_lock_func_nowait(&(block->debug_latch), file, line);
+	ret = rw_lock_s_lock_nowait(&(block->debug_latch), file, line);
 
 	ut_ad(ret == TRUE);
 	ut_ad(mutex_own(&block->mutex));

=== modified file 'storage/innobase/include/log0log.h'
--- a/storage/innobase/include/log0log.h	2006-03-10 16:22:21 +0000
+++ b/storage/innobase/include/log0log.h	2008-10-15 12:30:31 +0000
@@ -169,6 +169,13 @@ void
 log_buffer_flush_to_disk(void);
 /*==========================*/
 /********************************************************************
+Flushes the log buffer. Forces it to disk depending on the value of
+the configuration parameter innodb_flush_log_at_trx_commit. */
+
+void
+log_buffer_flush_maybe_sync(void);
+/*==========================*/
+/********************************************************************
 Advances the smallest lsn for which there are unflushed dirty blocks in the
 buffer pool and also may make a new checkpoint. NOTE: this function may only
 be called if the calling thread owns no synchronization objects! */

=== modified file 'storage/innobase/include/os0file.h'
--- a/storage/innobase/include/os0file.h	2007-07-10 14:34:21 +0000
+++ b/storage/innobase/include/os0file.h	2008-10-14 07:56:07 +0000
@@ -535,21 +535,19 @@ os_file_create_subdirs_if_needed(
 				   FALSE otherwise */
 	const char*	path);	/* in: path name */
 /****************************************************************************
-Initializes the asynchronous io system. Creates separate aio array for
-non-ibuf read and write, a third aio array for the ibuf i/o, with just one
-segment, two aio arrays for log reads and writes with one segment, and a
-synchronous aio array of the specified size. The combined number of segments
-in the three first aio arrays is the parameter n_segments given to the
-function. The caller must create an i/o handler thread for each segment in
-the four first arrays, but not for the sync aio array. */
+Initializes the asynchronous io system. Creates n_read_threads segments for
+read, n_write_threads segments for writes, one segment for the ibuf i/o, and
+one segment for log IO. Returns the number of segments created. When async
+IO is not used, and 4 threads should be created to process requests put
+in the segments. */
 
-void
+ulint
 os_aio_init(
 /*========*/
-	ulint	n,		/* in: maximum number of pending aio operations
-				allowed; n must be divisible by n_segments */
-	ulint	n_segments,	/* in: combined number of segments in the four
-				first aio arrays; must be >= 4 */
+	ulint	ios_per_array,	/* in: maximum number of pending aio operations
+                                allowed per array */
+	ulint	n_read_threads, /* in: number of read threads */
+	ulint	n_write_threads, /* in: number of write threads */
 	ulint	n_slots_sync);	/* in: number of slots in the sync aio array */
 /***********************************************************************
 Requests an asynchronous i/o operation. */

=== modified file 'storage/innobase/include/os0sync.h'
--- a/storage/innobase/include/os0sync.h	2008-06-12 00:08:07 +0000
+++ b/storage/innobase/include/os0sync.h	2009-07-02 14:23:36 +0000
@@ -12,6 +12,10 @@ Created 9/6/1995 Heikki Tuuri
 #include "univ.i"
 #include "ut0lst.h"
 
+#ifdef HAVE_SOLARIS_ATOMIC
+#include <atomic.h>
+#endif
+
 #ifdef __WIN__
 
 #define os_fast_mutex_t CRITICAL_SECTION
@@ -261,6 +265,45 @@ os_fast_mutex_free(
 /*===============*/
 	os_fast_mutex_t*	fast_mutex);	/* in: mutex to free */
 
+#ifdef UNIV_SYNC_ATOMIC
+/**************************************************************
+Atomic compare-and-swap for InnoDB. Currently requires GCC atomic builtins. */
+UNIV_INLINE
+ibool
+os_compare_and_swap(
+/*================*/
+						/* out: true if swapped */
+	volatile lint*		ptr,		/* in: pointer to target */
+	lint			oldVal,		/* in: value to compare to */
+	lint			newVal);	/* in: value to swap in */
+
+/**************************************************************
+Atomic increment for InnoDB. Currently requires GCC atomic builtins. */
+UNIV_INLINE
+lint
+os_atomic_increment(
+/*================*/
+						/* out: resulting value */
+	volatile lint*		ptr,		/* in: pointer to target */
+	lint			amount);	/* in: amount of increment */
+
+/**************************************************************
+Memory barrier operations for InnoDB.
+Currently requires GCC atomic builtins. */
+UNIV_INLINE
+void
+os_memory_barrier_load();
+
+UNIV_INLINE
+void
+os_memory_barrier_store();
+
+UNIV_INLINE
+void
+os_memory_barrier();
+
+#endif /* UNIV_SYNC_ATOMIC */
+
 #ifndef UNIV_NONINL
 #include "os0sync.ic"
 #endif

=== modified file 'storage/innobase/include/os0sync.ic'
--- a/storage/innobase/include/os0sync.ic	2006-03-10 16:22:21 +0000
+++ b/storage/innobase/include/os0sync.ic	2009-06-16 13:16:15 +0000
@@ -44,3 +44,109 @@ os_fast_mutex_trylock(
 #endif
 #endif
 }
+
+#ifdef UNIV_SYNC_ATOMIC
+/**************************************************************
+Atomic compare-and-swap for InnoDB. Currently requires GCC atomic builtins
+or Solaris atomic_* functions. */
+UNIV_INLINE
+ibool
+os_compare_and_swap(
+/*================*/
+					/* out: true if swapped */
+	volatile lint* 	ptr,		/* in: pointer to target */
+	lint		oldVal,		/* in: value to compare to */
+	lint		newVal)		/* in: value to swap in */
+{
+#ifdef HAVE_GCC_ATOMIC_BUILTINS
+	return (__sync_bool_compare_and_swap(ptr, oldVal, newVal));
+#elif HAVE_SOLARIS_ATOMIC
+	lint retVal = (lint)atomic_cas_ulong((volatile ulong_t *)ptr,
+		oldVal, newVal);
+	return (retVal == oldVal);
+#elif WIN_ATOMICS32
+        lint retVal = (lint)InterlockedCompareExchange(ptr, newVal, oldVal);
+        return (retVal == oldVal);
+#elif WIN_ATOMICS64
+        lint retVal = (lint)InterlockedCompareExchange64(ptr, newVal, oldVal);
+        return (retVal == oldVal);
+#else
+#error "Need support for atomic ops"
+#endif
+}
+
+/**************************************************************
+Memory barrier for load */
+UNIV_INLINE
+void
+os_memory_barrier_load()
+{
+#ifdef HAVE_GCC_ATOMIC_BUILTINS
+  __sync_synchronize();
+#elif HAVE_SOLARIS_ATOMIC
+  membar_consumer();
+#elif WIN_ATOMICS32
+  MemoryBarrier();
+#elif WIN_ATOMICS64
+  MemoryBarrier();
+#endif
+}
+
+/**************************************************************
+Memory barrier for store */
+UNIV_INLINE
+void
+os_memory_barrier_store()
+{
+#ifdef HAVE_GCC_ATOMIC_BUILTINS
+  __sync_synchronize();
+#elif HAVE_SOLARIS_ATOMIC
+  membar_producer();
+#elif WIN_ATOMICS32
+  MemoryBarrier();
+#elif WIN_ATOMICS64
+  MemoryBarrier();
+#endif
+}
+
+/**************************************************************
+Memory barrier */
+UNIV_INLINE
+void
+os_memory_barrier()
+{
+#ifdef HAVE_GCC_ATOMIC_BUILTINS
+  __sync_synchronize();
+#elif HAVE_SOLARIS_ATOMIC
+  membar_enter();
+#elif WIN_ATOMICS32
+  MemoryBarrier();
+#elif WIN_ATOMICS64
+  MemoryBarrier();
+#endif
+}
+
+
+/**************************************************************
+Atomic increment for InnoDB. Currently requires GCC atomic builtins. */
+UNIV_INLINE
+lint
+os_atomic_increment(
+/*================*/
+					/* out: resulting value */
+	volatile lint*	ptr,		/* in: pointer to target */
+	lint		amount)		/* in: amount of increment */
+{
+#ifdef HAVE_GCC_ATOMIC_BUILTINS
+	return (__sync_add_and_fetch(ptr, amount));
+#elif HAVE_SOLARIS_ATOMIC
+	return ((lint)atomic_add_long_nv((volatile ulong_t *)ptr, amount));
+#elif WIN_ATOMICS32
+        return ((lint)InterlockedExchangeAdd(ptr, amount) + amount);
+#elif WIN_ATOMICS64
+        return ((lint)InterlockedExchangeAdd64(ptr, amount) + amount);
+#else
+#error "Need support for atomic ops"
+#endif
+}
+#endif /* UNIV_SYNC_ATOMIC */

=== modified file 'storage/innobase/include/srv0srv.h'
--- a/storage/innobase/include/srv0srv.h	2009-05-19 08:37:33 +0000
+++ b/storage/innobase/include/srv0srv.h	2009-07-02 14:23:36 +0000
@@ -89,7 +89,22 @@ extern ulint	srv_awe_window_size;
 extern ulint	srv_mem_pool_size;
 extern ulint	srv_lock_table_size;
 
-extern ulint	srv_n_file_io_threads;
+extern ibool    srv_thread_concurrency_timer_based;
+
+/* Number of background IO threads for read and write. Replaces
+ * srv_n_file_io_threads. */
+extern ulint	srv_n_read_io_threads;
+extern ulint	srv_n_write_io_threads;
+/* Max number of adjacent IO requests to merge into one large request. */
+extern ulint	srv_max_merged_io;
+
+/* Number of IO operations per second the server can do */
+extern ulint    srv_io_capacity;
+
+/* Flush dirty pages when below max dirty percent */
+extern ibool  srv_extra_dirty_writes;
+
+
 
 #ifdef UNIV_LOG_ARCHIVE
 extern ibool	srv_log_archive_on;
@@ -232,6 +247,9 @@ extern ulint srv_read_ahead_seq;
 /* variable to count the number of random read-aheads were done */
 extern ulint srv_read_ahead_rnd;
 
+/* Number of threads that may have missed a lock wait wakeup */
+extern ulint sync_wake_ups;
+
 /* In this structure we store status variables to be passed to MySQL */
 typedef struct export_var_struct export_struc;
 
@@ -444,11 +462,7 @@ Outputs to a file the output of the Inno
 void
 srv_printf_innodb_monitor(
 /*======================*/
-	FILE*	file,		/* in: output stream */
-	ulint*	trx_start,	/* out: file position of the start of
-				the list of active transactions */
-	ulint*	trx_end);	/* out: file position of the end of
-				the list of active transactions */
+	FILE*	file);		/* in: output stream */
 
 /**********************************************************************
 Function to pass InnoDB status variables to MySQL */
@@ -508,6 +522,8 @@ struct export_var_struct{
 	ulint innodb_buffer_pool_read_ahead_rnd;
 	ulint innodb_dblwr_pages_written;
 	ulint innodb_dblwr_writes;
+	ibool innodb_have_sync_atomic;
+	ibool innodb_heap_enabled;
 	ulint innodb_log_waits;
 	ulint innodb_log_write_requests;
 	ulint innodb_log_writes;
@@ -528,6 +544,7 @@ struct export_var_struct{
 	ulint innodb_rows_inserted;
 	ulint innodb_rows_updated;
 	ulint innodb_rows_deleted;
+	ulint innodb_wake_ups;
 };
 
 /* The server system struct */
@@ -544,4 +561,3 @@ struct srv_sys_struct{
 extern ulint	srv_n_threads_active[];
 
 #endif
-

=== modified file 'storage/innobase/include/sync0rw.h'
--- a/storage/innobase/include/sync0rw.h	2008-06-12 00:08:07 +0000
+++ b/storage/innobase/include/sync0rw.h	2009-07-02 14:23:36 +0000
@@ -24,6 +24,12 @@ smaller than 30 and the order of the num
 #define	RW_X_LATCH	2
 #define	RW_NO_LATCH	3
 
+/* We decrement lock_word by this amount for each x_lock. It is also the
+start value for the lock_word, meaning that it limits the maximum number
+of concurrent read locks before the rw_lock breaks. The current value of
+0x00100000 allows 1,048,575 concurrent readers and 2047 recursive writers.*/
+#define X_LOCK_DECR		0x00100000
+
 typedef struct rw_lock_struct		rw_lock_t;
 #ifdef UNIV_SYNC_DEBUG
 typedef struct rw_lock_debug_struct	rw_lock_debug_t;
@@ -47,14 +53,14 @@ extern ibool		rw_lock_debug_waiters;	/* 
 					there may be waiters for the event */
 #endif /* UNIV_SYNC_DEBUG */
 
-extern	ulint	rw_s_system_call_count;
-extern	ulint	rw_s_spin_wait_count;
-extern	ulint	rw_s_exit_count;
-extern	ulint	rw_s_os_wait_count;
-extern	ulint	rw_x_system_call_count;
-extern	ulint	rw_x_spin_wait_count;
-extern	ulint	rw_x_os_wait_count;
-extern	ulint	rw_x_exit_count;
+extern	ib_longlong	rw_s_spin_wait_count;
+extern	ib_longlong	rw_s_spin_round_count;
+extern	ib_longlong	rw_s_exit_count;
+extern	ib_longlong	rw_s_os_wait_count;
+extern	ib_longlong	rw_x_spin_wait_count;
+extern	ib_longlong	rw_x_spin_round_count;
+extern	ib_longlong	rw_x_os_wait_count;
+extern	ib_longlong	rw_x_exit_count;
 
 /**********************************************************************
 Creates, or rather, initializes an rw-lock object in a specified memory
@@ -111,6 +117,20 @@ rw_lock_validate(
 /*=============*/
 	rw_lock_t*	lock);
 #endif /* UNIV_DEBUG */
+/**********************************************************************
+Low-level function which tries to lock an rw-lock in s-mode. Performs no
+spinning. */
+UNIV_INLINE
+ibool
+rw_lock_s_lock_low(
+/*===============*/
+                                /* out: TRUE if success */
+        rw_lock_t*      lock,   /* in: pointer to rw-lock */
+        ulint           pass,
+                                /* in: pass value; != 0, if the lock will be
+                                passed to another thread to unlock */
+        const char*     file_name, /* in: file name where lock requested */
+        ulint           line);  /* in: line where requested */
 /******************************************************************
 NOTE! The following macros should be used in rw s-locking, not the
 corresponding function. */
@@ -127,8 +147,8 @@ corresponding function. */
 NOTE! The following macros should be used in rw s-locking, not the
 corresponding function. */
 
-#define rw_lock_s_lock_nowait(M)	rw_lock_s_lock_func_nowait(\
-		(M), __FILE__, __LINE__)
+#define rw_lock_s_lock_nowait(M, F, L)    rw_lock_s_lock_low(\
+					  (M), 0, (F), (L))
 /**********************************************************************
 NOTE! Use the corresponding macro, not directly this function, except if
 you supply the file name and line number. Lock an rw-lock in shared mode
@@ -146,18 +166,6 @@ rw_lock_s_lock_func(
 	const char*	file_name,/* in: file name where lock requested */
 	ulint		line);	/* in: line where requested */
 /**********************************************************************
-NOTE! Use the corresponding macro, not directly this function, except if
-you supply the file name and line number. Lock an rw-lock in shared mode
-for the current thread if the lock can be acquired immediately. */
-UNIV_INLINE
-ibool
-rw_lock_s_lock_func_nowait(
-/*=======================*/
-				/* out: TRUE if success */
-	rw_lock_t*	lock,	/* in: pointer to rw-lock */
-	const char*	file_name,/* in: file name where lock requested */
-	ulint		line);	/* in: line where requested */
-/**********************************************************************
 NOTE! Use the corresponding macro, not directly this function! Lock an
 rw-lock in exclusive mode for the current thread if the lock can be
 obtained immediately. */
@@ -341,6 +349,23 @@ ulint
 rw_lock_get_reader_count(
 /*=====================*/
 	rw_lock_t*	lock);
+/**********************************************************************
+Decrements lock_word the specified amount if it is greater than 0.
+This is used by both s_lock and x_lock operations. */
+UNIV_INLINE
+ibool
+rw_lock_lock_word_decr(
+					/* out: TRUE if decr occurs */
+	rw_lock_t*	lock,		/* in: rw-lock */
+	ulint		amount);	/* in: amount to decrement */
+/**********************************************************************
+Increments lock_word the specified amount and returns new value. */
+UNIV_INLINE
+lint
+rw_lock_lock_word_incr(
+					/* out: TRUE if decr occurs */
+	rw_lock_t*	lock,
+	ulint		amount);	/* in: rw-lock */
 #ifdef UNIV_SYNC_DEBUG
 /**********************************************************************
 Checks if the thread has locked the rw-lock in the specified mode, with
@@ -417,44 +442,28 @@ Do not use its fields directly! The stru
 implementation of a read-write lock. Several threads may have a shared lock
 simultaneously in this lock, but only one writer may have an exclusive lock,
 in which case no shared locks are allowed. To prevent starving of a writer
-blocked by readers, a writer may queue for the lock by setting the writer
-field. Then no new readers are allowed in. */
+blocked by readers, a writer may queue for x-lock by decrementing lock_word:
+no new readers will be let in while the thread waits for readers to exit. */
 
 struct rw_lock_struct {
-	os_event_t	event;	/* Used by sync0arr.c for thread queueing */
-
-#ifdef __WIN__
-	os_event_t	wait_ex_event;	/* This windows specific event is
-				used by the thread which has set the
-				lock state to RW_LOCK_WAIT_EX. The
-				rw_lock design guarantees that this
-				thread will be the next one to proceed
-				once the current the event gets
-				signalled. See LEMMA 2 in sync0sync.c */
-#endif
-
-	ulint	reader_count;	/* Number of readers who have locked this
-				lock in the shared mode */
-	ulint	writer;		/* This field is set to RW_LOCK_EX if there
-				is a writer owning the lock (in exclusive
-				mode), RW_LOCK_WAIT_EX if a writer is
-				queueing for the lock, and
-				RW_LOCK_NOT_LOCKED, otherwise. */
-	os_thread_id_t	writer_thread;
-				/* Thread id of a possible writer thread */
-	ulint	writer_count;	/* Number of times the same thread has
-				recursively locked the lock in the exclusive
-				mode */
-	mutex_t	mutex;		/* The mutex protecting rw_lock_struct */
-	ulint	pass;		/* Default value 0. This is set to some
+	volatile lint	lock_word;
+				/* Holds the state of the lock. */
+	volatile ulint	waiters;/* 1: there are waiters */
+	volatile ulint	pass;	/* Default value 0. This is set to some
 				value != 0 given by the caller of an x-lock
 				operation, if the x-lock is to be passed to
 				another thread to unlock (which happens in
 				asynchronous i/o). */
-	ulint	waiters;	/* This ulint is set to 1 if there are
-				waiters (readers or writers) in the global
-				wait array, waiting for this rw_lock.
-				Otherwise, == 0. */
+	volatile os_thread_id_t	writer_thread;
+				/* Thread id of writer thread */
+	os_event_t	event;	/* Used by sync0arr.c for thread queueing */
+	os_event_t	wait_ex_event;
+				/* Event for next-writer to wait on. A thread
+				must decrement lock_word before waiting. */
+#ifndef UNIV_SYNC_ATOMIC
+	mutex_t	mutex;		/* The mutex protecting rw_lock_struct */
+#endif /* UNIV_SYNC_ATOMIC */
+
 	UT_LIST_NODE_T(rw_lock_t) list;
 				/* All allocated rw locks are put into a
 				list */
@@ -464,7 +473,9 @@ struct rw_lock_struct {
 				info list of the lock */
 	ulint	level;		/* Level in the global latching order. */
 #endif /* UNIV_SYNC_DEBUG */
+	ulint count_os_wait;	/* Count of os_waits. May not be accurate */
 	const char*	cfile_name;/* File name where lock created */
+        /* last s-lock file/line is not guaranteed to be correct */
 	const char*	last_s_file_name;/* File name where last s-locked */
 	const char*	last_x_file_name;/* File name where last x-locked */
 	ibool		writer_is_wait_ex;

=== modified file 'storage/innobase/include/sync0rw.ic'
--- a/storage/innobase/include/sync0rw.ic	2008-06-12 00:08:07 +0000
+++ b/storage/innobase/include/sync0rw.ic	2009-07-02 14:23:36 +0000
@@ -57,45 +57,68 @@ UNIV_INLINE
 void
 rw_lock_set_waiters(
 /*================*/
-	rw_lock_t*	lock,
-	ulint		flag)
+	rw_lock_t*	lock)
 {
-	lock->waiters = flag;
+#ifdef UNIV_SYNC_ATOMIC
+	os_compare_and_swap(&(lock->waiters), 0, 1);
+#else /* UNIV_SYNC_ATOMIC */
+	lock->waiters = 1;
+#endif /* UNIV_SYNC_ATOMIC */
 }
 UNIV_INLINE
-ulint
-rw_lock_get_writer(
-/*===============*/
+void
+rw_lock_reset_waiters(
+/*================*/
 	rw_lock_t*	lock)
 {
-	return(lock->writer);
+#ifdef UNIV_SYNC_ATOMIC
+	os_compare_and_swap(&(lock->waiters), 1, 0);
+#else /* UNIV_SYNC_ATOMIC */
+	lock->waiters = 0;
+#endif /* UNIV_SYNC_ATOMIC */
 }
+
+/**********************************************************************
+Returns the write-status of the lock - this function made more sense
+with the old rw_lock implementation.
+ */
 UNIV_INLINE
-void
-rw_lock_set_writer(
+ulint
+rw_lock_get_writer(
 /*===============*/
-	rw_lock_t*	lock,
-	ulint		flag)
+	rw_lock_t*	lock)
 {
-	lock->writer = flag;
+	lint lock_word = lock->lock_word;
+	if(lock_word > 0) {
+		/* return NOT_LOCKED in s-lock state, like the writer
+		member of the old lock implementation. */
+		return RW_LOCK_NOT_LOCKED;
+	} else if (((-lock_word) % X_LOCK_DECR) == 0) {
+		return RW_LOCK_EX;
+	} else {
+                ut_ad(lock_word > -X_LOCK_DECR);
+		return RW_LOCK_WAIT_EX;
+	}
 }
+
 UNIV_INLINE
 ulint
 rw_lock_get_reader_count(
 /*=====================*/
 	rw_lock_t*	lock)
 {
-	return(lock->reader_count);
-}
-UNIV_INLINE
-void
-rw_lock_set_reader_count(
-/*=====================*/
-	rw_lock_t*	lock,
-	ulint		count)
-{
-	lock->reader_count = count;
+	lint lock_word = lock->lock_word;
+	if(lock_word > 0) {
+		/* s-locked, no x-waiters */
+		return(X_LOCK_DECR - lock_word);
+	} else if (lock_word < 0 && lock_word > -X_LOCK_DECR) {
+		/* s-locked, with x-waiters */
+		return (ulint)(-lock_word);
+	}
+	return 0;
 }
+
+#ifndef UNIV_SYNC_ATOMIC
 UNIV_INLINE
 mutex_t*
 rw_lock_get_mutex(
@@ -104,6 +127,7 @@ rw_lock_get_mutex(
 {
 	return(&(lock->mutex));
 }
+#endif
 
 /**********************************************************************
 Returns the value of writer_count for the lock. Does not reserve the lock
@@ -115,7 +139,87 @@ rw_lock_get_x_lock_count(
 				/* out: value of writer_count */
 	rw_lock_t*	lock)	/* in: rw-lock */
 {
-	return(lock->writer_count);
+	lint lock_copy = lock->lock_word;
+	/* If there is a reader, lock_word is not divisible by X_LOCK_DECR */
+	if(lock_copy > 0 || (-lock_copy) % X_LOCK_DECR != 0) {
+		return 0;
+	}
+	return ((-lock_copy) / X_LOCK_DECR) + 1;
+}
+
+/**********************************************************************
+Two different implementations for decrementing the lock_word of a rw_lock:
+one for systems supporting atomic operations, one for others. This does
+does not support recusive x-locks: they should be handled by the caller and
+need not be atomic since they are performed by the current lock holder.
+Returns true if the decrement was made, false if not. */
+UNIV_INLINE
+ibool
+rw_lock_lock_word_decr(
+				/* out: TRUE if decr occurs */
+	rw_lock_t*	lock,	/* in: rw-lock */
+	ulint		amount)	/* in: amount of decrement */
+{
+
+#ifdef UNIV_SYNC_ATOMIC
+
+        lint local_lock_word = lock->lock_word;
+	while (local_lock_word > 0) {
+		if(os_compare_and_swap(&(lock->lock_word),
+                                       local_lock_word,
+                                       local_lock_word - amount)) {
+			return TRUE;
+		}
+		local_lock_word = lock->lock_word;
+	}
+	return(FALSE);
+
+#else /* UNIV_SYNC_ATOMIC */
+
+	ibool success = FALSE;
+	mutex_enter(&(lock->mutex));
+	if(lock->lock_word > 0) {
+		lock->lock_word -= amount;
+		success = TRUE;
+	}
+	mutex_exit(&(lock->mutex));
+	return success;
+
+#endif /* UNIV_SYNC_ATOMIC */
+
+}
+
+/**********************************************************************
+Two different implementations for incrementing the lock_word of a rw_lock:
+one for systems supporting atomic operations, one for others.
+Returns the value of lock_word after increment. */
+UNIV_INLINE
+lint
+rw_lock_lock_word_incr(
+				/* out: lock->lock_word after increment */
+	rw_lock_t*	lock,	/* in: rw-lock */
+	ulint		amount)	/* in: amount of increment */
+{
+
+#ifdef UNIV_SYNC_ATOMIC
+
+	return(os_atomic_increment(&(lock->lock_word), amount));
+
+#else /* UNIV_SYNC_ATOMIC */
+
+	lint local_lock_word;
+
+	mutex_enter(&(lock->mutex));
+
+	lock->lock_word += amount;
+	local_lock_word = lock->lock_word;
+
+	mutex_exit(&(lock->mutex));
+
+        return local_lock_word;
+
+#endif /* UNIV_SYNC_ATOMIC */
+
 }
 
 /**********************************************************************
@@ -133,27 +237,24 @@ rw_lock_s_lock_low(
 	const char*	file_name, /* in: file name where lock requested */
 	ulint		line)	/* in: line where requested */
 {
-	ut_ad(mutex_own(rw_lock_get_mutex(lock)));
-
-	/* Check if the writer field is free */
-
-	if (UNIV_LIKELY(lock->writer == RW_LOCK_NOT_LOCKED)) {
-		/* Set the shared lock by incrementing the reader count */
-		lock->reader_count++;
+	/* TODO: study performance of UNIV_LIKELY branch prediction hints. */
+	if (!rw_lock_lock_word_decr(lock, 1)) {
+		/* Locking did not succeed */
+		return(FALSE);
+	}
 
 #ifdef UNIV_SYNC_DEBUG
-		rw_lock_add_debug_info(lock, pass, RW_LOCK_SHARED, file_name,
-				       line);
+	rw_lock_add_debug_info(lock, pass, RW_LOCK_SHARED, file_name, line);
 #endif
-		lock->last_s_file_name = file_name;
-		lock->last_s_line = line;
-
-		return(TRUE);	/* locking succeeded */
-	}
+	/* These debugging values are not set safely: they may be incorrect
+        or even refer to a line that is invalid for the file name. */
+	lock->last_s_file_name = file_name;
+	lock->last_s_line = line;
 
-	return(FALSE);	/* locking did not succeed */
+	return(TRUE);	/* locking succeeded */
 }
 
+/* TODO: The "direct" functions are not used. Remove them? */
 /**********************************************************************
 Low-level function which locks an rw-lock in s-mode when we know that it
 is possible and none else is currently accessing the rw-lock structure.
@@ -166,11 +267,10 @@ rw_lock_s_lock_direct(
 	const char*	file_name,	/* in: file name where requested */
 	ulint		line)		/* in: line where lock requested */
 {
-	ut_ad(lock->writer == RW_LOCK_NOT_LOCKED);
-	ut_ad(rw_lock_get_reader_count(lock) == 0);
+	ut_ad(lock->lock_word == X_LOCK_DECR);
 
-	/* Set the shared lock by incrementing the reader count */
-	lock->reader_count++;
+	/* Indicate there is a new reader by decrementing lock_word */
+	lock->lock_word--;
 
 	lock->last_s_file_name = file_name;
 	lock->last_s_line = line;
@@ -180,6 +280,7 @@ rw_lock_s_lock_direct(
 #endif
 }
 
+/* TODO: The "direct" functions are not used. Remove them? */
 /**********************************************************************
 Low-level function which locks an rw-lock in x-mode when we know that it
 is not locked and none else is currently accessing the rw-lock structure.
@@ -193,12 +294,10 @@ rw_lock_x_lock_direct(
 	ulint		line)		/* in: line where lock requested */
 {
 	ut_ad(rw_lock_validate(lock));
-	ut_ad(rw_lock_get_reader_count(lock) == 0);
-	ut_ad(rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED);
+	ut_ad(lock->lock_word == X_LOCK_DECR);
 
-	rw_lock_set_writer(lock, RW_LOCK_EX);
+	lock->lock_word -= X_LOCK_DECR;
 	lock->writer_thread = os_thread_get_curr_id();
-	lock->writer_count++;
 	lock->pass = 0;
 
 	lock->last_x_file_name = file_name;
@@ -240,15 +339,12 @@ rw_lock_s_lock_func(
 	ut_ad(!rw_lock_own(lock, RW_LOCK_SHARED)); /* see NOTE above */
 #endif /* UNIV_SYNC_DEBUG */
 
-	mutex_enter(rw_lock_get_mutex(lock));
-
-	if (UNIV_LIKELY(rw_lock_s_lock_low(lock, pass, file_name, line))) {
-		mutex_exit(rw_lock_get_mutex(lock));
+	/* TODO: study performance of UNIV_LIKELY branch prediction hints. */
+	if (rw_lock_s_lock_low(lock, pass, file_name, line)) {
 
 		return; /* Success */
 	} else {
 		/* Did not succeed, try spin wait */
-		mutex_exit(rw_lock_get_mutex(lock));
 
 		rw_lock_s_lock_spin(lock, pass, file_name, line);
 
@@ -258,86 +354,66 @@ rw_lock_s_lock_func(
 
 /**********************************************************************
 NOTE! Use the corresponding macro, not directly this function! Lock an
-rw-lock in shared mode for the current thread if the lock can be acquired
-immediately. */
+rw-lock in exclusive mode for the current thread if the lock can be
+obtained immediately. */
 UNIV_INLINE
 ibool
-rw_lock_s_lock_func_nowait(
+rw_lock_x_lock_func_nowait(
 /*=======================*/
 				/* out: TRUE if success */
 	rw_lock_t*	lock,	/* in: pointer to rw-lock */
 	const char*	file_name,/* in: file name where lock requested */
 	ulint		line)	/* in: line where requested */
 {
-	ibool	success	= FALSE;
-
-	mutex_enter(rw_lock_get_mutex(lock));
-
-	if (lock->writer == RW_LOCK_NOT_LOCKED) {
-		/* Set the shared lock by incrementing the reader count */
-		lock->reader_count++;
+	os_thread_id_t	curr_thread	= os_thread_get_curr_id();
 
-#ifdef UNIV_SYNC_DEBUG
-		rw_lock_add_debug_info(lock, 0, RW_LOCK_SHARED, file_name,
-				       line);
-#endif
+	ibool success;
 
-		lock->last_s_file_name = file_name;
-		lock->last_s_line = line;
+#ifdef UNIV_SYNC_ATOMIC
+	success = os_compare_and_swap(&(lock->lock_word), X_LOCK_DECR, 0);
+#else
 
+	success = FALSE;
+	mutex_enter(&(lock->mutex));
+	if(lock->lock_word == X_LOCK_DECR) {
+		lock->lock_word = 0;
 		success = TRUE;
 	}
+	mutex_exit(&(lock->mutex));
 
-	mutex_exit(rw_lock_get_mutex(lock));
-
-	return(success);
-}
-
-/**********************************************************************
-NOTE! Use the corresponding macro, not directly this function! Lock an
-rw-lock in exclusive mode for the current thread if the lock can be
-obtained immediately. */
-UNIV_INLINE
-ibool
-rw_lock_x_lock_func_nowait(
-/*=======================*/
-				/* out: TRUE if success */
-	rw_lock_t*	lock,	/* in: pointer to rw-lock */
-	const char*	file_name,/* in: file name where lock requested */
-	ulint		line)	/* in: line where requested */
-{
-	ibool		success		= FALSE;
-	os_thread_id_t	curr_thread	= os_thread_get_curr_id();
-	mutex_enter(rw_lock_get_mutex(lock));
-
-	if (UNIV_UNLIKELY(rw_lock_get_reader_count(lock) != 0)) {
-	} else if (UNIV_LIKELY(rw_lock_get_writer(lock)
-			       == RW_LOCK_NOT_LOCKED)) {
-		rw_lock_set_writer(lock, RW_LOCK_EX);
+#endif
+	if(success) {
 		lock->writer_thread = curr_thread;
 		lock->pass = 0;
-relock:
-		lock->writer_count++;
 
-#ifdef UNIV_SYNC_DEBUG
-		rw_lock_add_debug_info(lock, 0, RW_LOCK_EX, file_name, line);
-#endif
+	} else if (!(lock->pass) &&
+		   os_thread_eq(lock->writer_thread, curr_thread)) {
+		/* Must verify pass first: otherwise another thread can
+		call move_ownership suddenly allowing recursive locks.
+		and after we have verified our thread_id matches
+		(though move_ownership has since changed it).*/
+
+		/* Relock: this lock_word modification is safe since no other
+		threads can modify (lock, unlock, or reserve) lock_word while
+		there is an exclusive writer and this is the writer thread. */
+		lock->lock_word -= X_LOCK_DECR;
 
-		lock->last_x_file_name = file_name;
-		lock->last_x_line = line;
+		ut_ad(((-lock->lock_word) % X_LOCK_DECR) == 0);
 
-		success = TRUE;
-	} else if (rw_lock_get_writer(lock) == RW_LOCK_EX
-		   && lock->pass == 0
-		   && os_thread_eq(lock->writer_thread, curr_thread)) {
-		goto relock;
+	} else {
+		/* Failure */
+		return(FALSE);
 	}
+#ifdef UNIV_SYNC_DEBUG
+	rw_lock_add_debug_info(lock, 0, RW_LOCK_EX, file_name, line);
+#endif
 
-	mutex_exit(rw_lock_get_mutex(lock));
+	lock->last_x_file_name = file_name;
+	lock->last_x_line = line;
 
 	ut_ad(rw_lock_validate(lock));
 
-	return(success);
+	return(TRUE);
 }
 
 /**********************************************************************
@@ -353,39 +429,21 @@ rw_lock_s_unlock_func(
 #endif
 	)
 {
-	mutex_t*	mutex	= &(lock->mutex);
-	ibool		sg	= FALSE;
-
-	/* Acquire the mutex protecting the rw-lock fields */
-	mutex_enter(mutex);
-
-	/* Reset the shared lock by decrementing the reader count */
-
-	ut_a(lock->reader_count > 0);
-	lock->reader_count--;
+	ut_ad((lock->lock_word % X_LOCK_DECR) != 0);
 
 #ifdef UNIV_SYNC_DEBUG
 	rw_lock_remove_debug_info(lock, pass, RW_LOCK_SHARED);
 #endif
 
-	/* If there may be waiters and this was the last s-lock,
-	signal the object */
-
-	if (UNIV_UNLIKELY(lock->waiters)
-	    && lock->reader_count == 0) {
-		sg = TRUE;
-
-		rw_lock_set_waiters(lock, 0);
-	}
-
-	mutex_exit(mutex);
+	/* Increment lock_word to indicate 1 less reader */
+	if(rw_lock_lock_word_incr(lock, 1) == 0) {
 
-	if (UNIV_UNLIKELY(sg)) {
-#ifdef __WIN__
+		/* wait_ex waiter exists. It may not be asleep, but we signal
+                anyway. We do not wake other waiters, because they can't
+                exist without wait_ex waiter and wait_ex waiter goes first.*/
 		os_event_set(lock->wait_ex_event);
-#endif
-		os_event_set(lock->event);
 		sync_array_object_signalled(sync_primary_wait_array);
+
 	}
 
 	ut_ad(rw_lock_validate(lock));
@@ -395,6 +453,7 @@ rw_lock_s_unlock_func(
 #endif
 }
 
+/* TODO: The "direct" functions are not used. Remove them? */
 /**********************************************************************
 Releases a shared mode lock when we know there are no waiters and none
 else will access the lock during the time this function is executed. */
@@ -404,17 +463,16 @@ rw_lock_s_unlock_direct(
 /*====================*/
 	rw_lock_t*	lock)	/* in: rw-lock */
 {
-	/* Reset the shared lock by decrementing the reader count */
-
-	ut_ad(lock->reader_count > 0);
-
-	lock->reader_count--;
+	ut_ad(lock->lock_word < X_LOCK_DECR);
 
 #ifdef UNIV_SYNC_DEBUG
 	rw_lock_remove_debug_info(lock, 0, RW_LOCK_SHARED);
 #endif
 
-	ut_ad(!lock->waiters);
+	/* Decrease reader count by incrementing lock_word */
+	lock->lock_word++;
+
+	ut_ad(!rw_lock_get_waiters(lock));
 	ut_ad(rw_lock_validate(lock));
 #ifdef UNIV_SYNC_PERF_STAT
 	rw_s_exit_count++;
@@ -434,42 +492,34 @@ rw_lock_x_unlock_func(
 #endif
 	)
 {
-	ibool	sg	= FALSE;
+        uint local_pass;
+	ut_ad((lock->lock_word % X_LOCK_DECR) == 0);
 
-	/* Acquire the mutex protecting the rw-lock fields */
-	mutex_enter(&(lock->mutex));
-
-	/* Reset the exclusive lock if this thread no longer has an x-mode
-	lock */
-
-	ut_ad(lock->writer_count > 0);
-
-	lock->writer_count--;
-
-	if (lock->writer_count == 0) {
-		rw_lock_set_writer(lock, RW_LOCK_NOT_LOCKED);
-	}
+	/*
+          Must reset pass while we still have the lock.
+	  If we are not the last unlocker, we correct it later in the function,
+	  which is harmless since we still hold the lock.
+        */
+        local_pass = lock->pass;
+        lock->pass = 1;
 
 #ifdef UNIV_SYNC_DEBUG
 	rw_lock_remove_debug_info(lock, pass, RW_LOCK_EX);
 #endif
 
-	/* If there may be waiters, signal the lock */
-	if (UNIV_UNLIKELY(lock->waiters)
-	    && lock->writer_count == 0) {
-
-		sg = TRUE;
-		rw_lock_set_waiters(lock, 0);
-	}
-
-	mutex_exit(&(lock->mutex));
+	if(rw_lock_lock_word_incr(lock, X_LOCK_DECR) == X_LOCK_DECR) {
+		/* Lock is now free. May have to signal read/write waiters.
+                We do not need to signal wait_ex waiters, since they cannot
+                exist when there is a writer. */
+		if(rw_lock_get_waiters(lock)) {
+			rw_lock_reset_waiters(lock);
+			os_event_set(lock->event);
+			sync_array_object_signalled(sync_primary_wait_array);
+		}
 
-	if (UNIV_UNLIKELY(sg)) {
-#ifdef __WIN__
-		os_event_set(lock->wait_ex_event);
-#endif
-		os_event_set(lock->event);
-		sync_array_object_signalled(sync_primary_wait_array);
+	} else {
+		/* We still hold x-lock, so we correct pass. */
+		lock->pass = local_pass;
 	}
 
 	ut_ad(rw_lock_validate(lock));
@@ -479,6 +529,7 @@ rw_lock_x_unlock_func(
 #endif
 }
 
+/* TODO: The "direct" functions are not used. Remove them? */
 /**********************************************************************
 Releases an exclusive mode lock when we know there are no waiters, and
 none else will access the lock durint the time this function is executed. */
@@ -491,19 +542,15 @@ rw_lock_x_unlock_direct(
 	/* Reset the exclusive lock if this thread no longer has an x-mode
 	lock */
 
-	ut_ad(lock->writer_count > 0);
-
-	lock->writer_count--;
-
-	if (lock->writer_count == 0) {
-		rw_lock_set_writer(lock, RW_LOCK_NOT_LOCKED);
-	}
+	ut_ad((lock->lock_word % X_LOCK_DECR) == 0);
 
 #ifdef UNIV_SYNC_DEBUG
 	rw_lock_remove_debug_info(lock, 0, RW_LOCK_EX);
 #endif
+	lock->pass = 1;
+	lock->lock_word += X_LOCK_DECR;
 
-	ut_ad(!lock->waiters);
+	ut_ad(!rw_lock_get_waiters(lock));
 	ut_ad(rw_lock_validate(lock));
 
 #ifdef UNIV_SYNC_PERF_STAT

=== modified file 'storage/innobase/include/sync0sync.h'
--- a/storage/innobase/include/sync0sync.h	2008-06-12 00:08:07 +0000
+++ b/storage/innobase/include/sync0sync.h	2009-07-02 14:23:36 +0000
@@ -16,6 +16,9 @@ Created 9/5/1995 Heikki Tuuri
 #include "os0thread.h"
 #include "os0sync.h"
 #include "sync0arr.h"
+#ifndef WIN32
+#include "my_atomic.h"
+#endif
 
 #ifndef UNIV_HOTBACKUP
 extern my_bool	timed_mutexes;
@@ -252,7 +255,7 @@ mutex_n_reserved(void);
 NOT to be used outside this module except in debugging! Gets the value
 of the lock word. */
 UNIV_INLINE
-ulint
+byte
 mutex_get_lock_word(
 /*================*/
 	const mutex_t*	mutex);	/* in: mutex */
@@ -471,9 +474,16 @@ implementation of a mutual exclusion sem
 
 struct mutex_struct {
 	os_event_t	event;	/* Used by sync0arr.c for the wait queue */
-	ulint	lock_word;	/* This ulint is the target of the atomic
-				test-and-set instruction in Win32 */
-#if !defined(_WIN32) || !defined(UNIV_CAN_USE_X86_ASSEMBLER)
+
+ 	byte	lock_word;	/* This byte is the target of the atomic
+ 				test-and-set instruction in Win32 and
+ 				x86 32/64 with GCC 4.1.0 or later version */
+#if defined(_WIN32) && defined(UNIV_CAN_USE_X86_ASSEMBLER)
+#elif defined(MY_ATOMIC_NOLOCK)
+				/* We have my_atomic_* routines that are
+				intrinsically atomic, so no need for the
+				mutex. */
+#else
 	os_fast_mutex_t
 		os_fast_mutex;	/* In other systems we use this OS mutex
 				in place of lock_word */
@@ -526,8 +536,7 @@ to 20 microseconds. */
 /* The number of system calls made in this module. Intended for performance
 monitoring. */
 
-extern	ulint	mutex_system_call_count;
-extern	ulint	mutex_exit_count;
+extern	ib_longlong	mutex_exit_count;
 
 #ifdef UNIV_SYNC_DEBUG
 /* Latching order checks start when this is set TRUE */

=== modified file 'storage/innobase/include/sync0sync.ic'
--- a/storage/innobase/include/sync0sync.ic	2008-12-19 00:34:15 +0000
+++ b/storage/innobase/include/sync0sync.ic	2009-07-02 14:23:36 +0000
@@ -6,16 +6,6 @@ Mutex, the basic synchronization primiti
 Created 9/5/1995 Heikki Tuuri
 *******************************************************/
 
-#if defined(not_defined) && defined(__GNUC__) && defined(UNIV_INTEL_X86)
-/* %z0: Use the size of operand %0 which in our case is *m to determine
-instruction size, it should end up as xchgl. "1" in the input constraint,
-says that "in" has to go in the same place as "out".*/
-#define TAS(m, in, out) \
-	asm volatile ("xchg%z0 %2, %0" \
-	: "=g" (*(m)), "=r" (out) \
-	: "1" (in))	/* Note: "1" here refers to "=r" (out) */
-#endif
-
 /**********************************************************************
 Sets the waiters field in a mutex. */
 
@@ -59,7 +49,7 @@ mutex_signal_object(
 Performs an atomic test-and-set instruction to the lock_word field of a
 mutex. */
 UNIV_INLINE
-ulint
+byte
 mutex_test_and_set(
 /*===============*/
 				/* out: the previous value of lock_word: 0 or
@@ -67,18 +57,18 @@ mutex_test_and_set(
 	mutex_t*	mutex)	/* in: mutex */
 {
 #if defined(_WIN32) && defined(UNIV_CAN_USE_X86_ASSEMBLER)
-	ulint	res;
-	ulint*	lw;		/* assembler code is used to ensure that
+	byte	res;
+	byte*	lw;		/* assembler code is used to ensure that
 				lock_word is loaded from memory */
 	ut_ad(mutex);
-	ut_ad(sizeof(ulint) == 4);
+	ut_ad(sizeof(byte) == 1);
 
 	lw = &(mutex->lock_word);
 
 	__asm	MOV	ECX, lw
 		__asm	MOV	EDX, 1
-		__asm	XCHG	EDX, DWORD PTR [ECX]
-		__asm	MOV	res, EDX
+		__asm	XCHG	DL, BYTE PTR [ECX]
+		__asm	MOV	res, DL
 
 		/* The fence below would prevent this thread from
 		reading the data structure protected by the mutex
@@ -98,12 +88,9 @@ mutex_test_and_set(
 		/* mutex_fence(); */
 
 		return(res);
-#elif defined(not_defined) && defined(__GNUC__) && defined(UNIV_INTEL_X86)
-	ulint	res;
-
-	TAS(&mutex->lock_word, 1, res);
-
-	return(res);
+#elif defined(MY_ATOMIC_NOLOCK)
+	return ((byte)my_atomic_swap8(
+		(int8 volatile *)&(mutex->lock_word), 1));
 #else
 	ibool	ret;
 
@@ -117,7 +104,7 @@ mutex_test_and_set(
 		mutex->lock_word = 1;
 	}
 
-	return(ret);
+	return((byte)ret);
 #endif
 }
 
@@ -131,7 +118,7 @@ mutex_reset_lock_word(
 	mutex_t*	mutex)	/* in: mutex */
 {
 #if defined(_WIN32) && defined(UNIV_CAN_USE_X86_ASSEMBLER)
-	ulint*	lw;		/* assembler code is used to ensure that
+	byte*	lw;		/* assembler code is used to ensure that
 				lock_word is loaded from memory */
 	ut_ad(mutex);
 
@@ -139,11 +126,12 @@ mutex_reset_lock_word(
 
 	__asm	MOV	EDX, 0
 		__asm	MOV	ECX, lw
-		__asm	XCHG	EDX, DWORD PTR [ECX]
-#elif defined(not_defined) && defined(__GNUC__) && defined(UNIV_INTEL_X86)
-	ulint	res;
-
-	TAS(&mutex->lock_word, 0, res);
+		__asm	XCHG	DL, BYTE PTR [ECX]
+#elif defined(MY_ATOMIC_NOLOCK)
+	/* In theory __sync_lock_release should be used to release the lock.
+	Unfortunately, it does not work properly alone. The workaround is
+	that more conservative __sync_lock_test_and_set is used instead. */
+	(void)my_atomic_swap8((int8 volatile *)&(mutex->lock_word), 0);
 #else
 	mutex->lock_word = 0;
 
@@ -154,12 +142,12 @@ mutex_reset_lock_word(
 /**********************************************************************
 Gets the value of the lock word. */
 UNIV_INLINE
-ulint
+byte
 mutex_get_lock_word(
 /*================*/
 	const mutex_t*	mutex)	/* in: mutex */
 {
-	const volatile ulint*	ptr;	/* declared volatile to ensure that
+	const volatile byte*	ptr;	/* declared volatile to ensure that
 					lock_word is loaded from memory */
 	ut_ad(mutex);
 

=== modified file 'storage/innobase/include/univ.i'
--- a/storage/innobase/include/univ.i	2008-07-08 16:01:41 +0000
+++ b/storage/innobase/include/univ.i	2009-07-02 14:23:36 +0000
@@ -9,6 +9,10 @@ Created 1/20/1994 Heikki Tuuri
 #ifndef univ_i
 #define univ_i
 
+#ifdef __SUNPRO_C
+# include <sun_prefetch.h>
+#endif
+
 #if (defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)) && !defined(MYSQL_SERVER) && !defined(__WIN__)
 # undef __WIN__
 # define __WIN__
@@ -56,7 +60,7 @@ of the 32-bit x86 assembler in mutex ope
 # endif
 
 /* We only try to do explicit inlining of functions with gcc and
-Microsoft Visual C++ */
+ Sun Studio */
 
 # if !defined(__GNUC__) && (!defined(__SUNPRO_C) || (__SUNPRO_C < 0x590))
 #  undef  UNIV_MUST_NOT_INLINE			/* Remove compiler warning */
@@ -116,6 +120,24 @@ by one. */
 #define UNIV_SET_MEM_TO_ZERO
 #endif
 
+/* Use malloc instead of innodb additional memory pool (great with tcmalloc) */
+#define UNIV_DISABLE_MEM_POOL
+
+#if defined(HAVE_GCC_ATOMIC_BUILTINS) || defined(HAVE_SOLARIS_ATOMIC)
+/*
+ * We have a full set of atomic ops available - we will use them
+ */
+#define UNIV_SYNC_ATOMIC
+#endif
+
+#if defined(WIN_ATOMICS32) || defined(WIN_ATOMICS64)
+/*
+ * We have a full set of atomic ops available - we will use them
+ * This is on Windows
+ */
+#define UNIV_SYNC_ATOMIC
+#endif
+
 /*
 #define UNIV_SQL_DEBUG
 #define UNIV_LOG_DEBUG
@@ -274,6 +296,11 @@ it is read. */
 /* Minimize cache-miss latency by moving data at addr into a cache before
 it is read or written. */
 # define UNIV_PREFETCH_RW(addr) __builtin_prefetch(addr, 1, 3)
+#elif defined(__SUNPRO_C)
+# define UNIV_EXPECT(expr,value) (expr)
+# define UNIV_LIKELY_NULL(expr) (expr)
+# define UNIV_PREFETCH_R(addr) sun_prefetch_read_many(addr)
+# define UNIV_PREFETCH_RW(addr) sun_prefetch_write_many(addr)
 #else
 /* Dummy versions of the macros */
 # define UNIV_EXPECT(expr,value) (expr)

=== modified file 'storage/innobase/include/ut0ut.h'
--- a/storage/innobase/include/ut0ut.h	2008-12-19 00:34:15 +0000
+++ b/storage/innobase/include/ut0ut.h	2009-07-02 14:23:36 +0000
@@ -17,6 +17,24 @@ Created 1/20/1994 Heikki Tuuri
 
 typedef time_t	ib_time_t;
 
+#ifdef HAVE_PAUSE_INSTRUCTION
+#define PAUSE_INSTRUCTION() {__asm__ __volatile__ ("pause");}
+#else
+#ifdef HAVE_FAKE_PAUSE_INSTRUCTION
+#define PAUSE_INSTRUCTION() {__asm__ __volatile__ ("rep; nop");}
+#else
+#ifdef UNIV_SYNC_ATOMIC
+#define PAUSE_INSTRUCTION() \
+  { \
+    volatile lint volatile_var; \
+    os_compare_and_swap(&volatile_var, 0, 1); \
+  }
+#else
+#define PAUSE_INSTRUCTION()
+#endif
+#endif
+#endif
+
 /************************************************************
 Gets the high 32 bits in a ulint. That is makes a shift >> 32,
 but since there seem to be compiler bugs in both gcc and Visual C++,
@@ -156,6 +174,18 @@ ut_usectime(
 			/* out: 0 on success, -1 otherwise */
 	ulint*	sec,	/* out: seconds since the Epoch */
 	ulint*	ms);	/* out: microseconds since the Epoch+*sec */
+
+/**************************************************************
+Returns diff in microseconds (end_sec,end_ms) - (start_sec,start_ms). */
+
+ib_longlong
+ut_usecdiff(
+/*========*/
+	ulint	end_sec,	/* in: seconds since the Epoch */
+	ulint	end_ms,	/* in: microseconds since the Epoch+*sec1 */
+	ulint	start_sec,	/* in: seconds since the Epoch */
+	ulint	start_ms);	/* in: microseconds since the Epoch+*sec2 */
+
 /**************************************************************
 Returns the difference of two times in seconds. */
 

=== modified file 'storage/innobase/log/log0log.c'
--- a/storage/innobase/log/log0log.c	2007-07-10 14:34:21 +0000
+++ b/storage/innobase/log/log0log.c	2008-10-15 12:30:31 +0000
@@ -1517,6 +1517,26 @@ log_buffer_flush_to_disk(void)
 }
 
 /********************************************************************
+Flush the log buffer. Force it to disk depending on the value of
+innodb_flush_log_at_trx_commit. */
+
+void
+log_buffer_flush_maybe_sync(void)
+/*==========================*/
+{
+	dulint	lsn;
+
+	mutex_enter(&(log_sys->mutex));
+
+	lsn = log_sys->lsn;
+
+	mutex_exit(&(log_sys->mutex));
+
+	/* Force log buffer to disk when innodb_flush_log_at_trx_commit = 1. */
+	log_write_up_to(lsn, LOG_WAIT_ALL_GROUPS,
+			srv_flush_log_at_trx_commit == 1 ? TRUE : FALSE);
+}
+/********************************************************************
 Tries to establish a big enough margin of free space in the log buffer, such
 that a new log entry can be catenated without an immediate need for a flush. */
 static

=== modified file 'storage/innobase/mem/mem0pool.c'
--- a/storage/innobase/mem/mem0pool.c	2007-07-25 01:34:31 +0000
+++ b/storage/innobase/mem/mem0pool.c	2008-10-16 18:07:50 +0000
@@ -329,6 +329,10 @@ mem_area_alloc(
 				minus MEM_AREA_EXTRA_SIZE */
 	mem_pool_t*	pool)	/* in: memory pool */
 {
+#ifdef UNIV_DISABLE_MEM_POOL
+        (void)pool; /* Remove compiler warning */
+        return malloc(size);
+#else /* UNIV_DISABLE_MEM_POOL */
 	mem_area_t*	area;
 	ulint		n;
 	ibool		ret;
@@ -407,6 +411,7 @@ mem_area_alloc(
 		       ut_2_exp(n) - MEM_AREA_EXTRA_SIZE);
 
 	return((void*)(MEM_AREA_EXTRA_SIZE + ((byte*)area)));
+#endif /* UNIV_DISABLE_MEM_POOL */
 }
 
 /************************************************************************
@@ -459,6 +464,10 @@ mem_area_free(
 				buffer */
 	mem_pool_t*	pool)	/* in: memory pool */
 {
+#ifdef UNIV_DISABLE_MEM_POOL
+        (void)pool; /* Remove compiler warning */
+        free(ptr);
+#else /* UNIV_DISABLE_MEM_POOL */
 	mem_area_t*	area;
 	mem_area_t*	buddy;
 	void*		new_ptr;
@@ -570,6 +579,7 @@ mem_area_free(
 	mutex_exit(&(pool->mutex));
 
 	ut_ad(mem_pool_validate(pool));
+#endif /* UNIV_DISABLE_MEM_POOL */
 }
 
 /************************************************************************

=== modified file 'storage/innobase/os/os0file.c'
--- a/storage/innobase/os/os0file.c	2009-01-26 16:03:39 +0000
+++ b/storage/innobase/os/os0file.c	2009-07-02 14:23:36 +0000
@@ -62,6 +62,28 @@ ibool	os_aio_use_native_aio	= FALSE;
 
 ibool	os_aio_print_debug	= FALSE;
 
+/* State for the state of an IO request in simulated AIO.
+   Protocol for simulated aio:
+     client requests IO: find slot with reserved = FALSE. Add entry with
+                         status = OS_AIO_NOT_ISSUED.
+     IO thread wakes: find adjacent slots with reserved = TRUE and status =
+                      OS_AIO_NOT_ISSUED. Change status for slots to
+                      OS_AIO_ISSUED.
+     IO operation completes: set status for slots to OS_AIO_DONE. set status
+                             for the first slot to OS_AIO_CLAIMED and return
+                             result for that slot.
+   When there are multiple read and write threads, they all compete to execute
+   the requests in the array (os_aio_array_t). This avoids the need to load
+   balance requests at the time the request is made at the cost of waking all
+   threads when a request is available.
+*/
+typedef enum {
+	OS_AIO_NOT_ISSUED, /* Available to be processed by an IO thread. */
+	OS_AIO_ISSUED,     /* Being processed by an IO thread. */
+	OS_AIO_DONE,       /* Request processed. */
+	OS_AIO_CLAIMED     /* Result being returned to client. */
+} os_aio_status;
+
 /* The aio array slot structure */
 typedef struct os_aio_slot_struct	os_aio_slot_t;
 
@@ -70,6 +92,8 @@ struct os_aio_slot_struct{
 	ulint		pos;		/* index of the slot in the aio
 					array */
 	ibool		reserved;	/* TRUE if this slot is reserved */
+	os_aio_status   status;		/* Status for current request. Valid when reserved
+					is TRUE. Used only in simulated aio. */
 	time_t		reservation_time;/* time when reserved */
 	ulint		len;		/* length of the block to read or
 					write */
@@ -80,11 +104,6 @@ struct os_aio_slot_struct{
 	ulint		offset_high;	/* 32 high bits of file offset */
 	os_file_t	file;		/* file where to read or write */
 	const char*	name;		/* file name or path */
-	ibool		io_already_done;/* used only in simulated aio:
-					TRUE if the physical i/o already
-					made and only the slot message
-					needs to be passed to the caller
-					of os_aio_simulated_handle */
 	fil_node_t*	message1;	/* message which is given by the */
 	void*		message2;	/* the requester of an aio operation
 					and which can be used to identify
@@ -114,9 +133,6 @@ struct os_aio_array_struct{
 				  in this array */
 	ulint		n_slots;  /* Total number of slots in the aio array.
 				  This must be divisible by n_threads. */
-	ulint		n_segments;/* Number of segments in the aio array of
-				  pending aio requests. A thread can wait
-				  separately for any one of the segments. */
 	ulint		n_reserved;/* Number of reserved slots in the
 				  aio array outside the ibuf segment */
 	os_aio_slot_t*	slots;	  /* Pointer to the slots in the array */
@@ -133,6 +149,17 @@ struct os_aio_array_struct{
 /* Array of events used in simulated aio */
 os_event_t*	os_aio_segment_wait_events	= NULL;
 
+/* Number of threads for reading and writing. */
+ulint os_aio_read_threads = 0;
+ulint os_aio_write_threads = 0;
+
+/* Number for the first global segment for reading. */
+const ulint os_aio_first_read_segment = 2;
+
+/* Number for the first global segment for writing. Set to
+2 + os_aio_read_write_threads. */
+ulint os_aio_first_write_segment = 0;
+
 /* The aio arrays for non-ibuf i/o and ibuf i/o, as well as sync aio. These
 are NULL when the module has not yet been initialized. */
 static os_aio_array_t*	os_aio_read_array	= NULL;
@@ -141,11 +168,39 @@ static os_aio_array_t*	os_aio_ibuf_array
 static os_aio_array_t*	os_aio_log_array	= NULL;
 static os_aio_array_t*	os_aio_sync_array	= NULL;
 
+/* Per thread buffer used for merged IO requests. Used by
+os_aio_simulated_handle so that a buffer doesn't have to be allocated
+for each request. */
+static char* os_aio_thread_buffer[SRV_MAX_N_IO_THREADS];
+static ulint os_aio_thread_buffer_size[SRV_MAX_N_IO_THREADS];
+
+/* Count pages read and written per thread */
+static ulint os_aio_thread_io_reads[SRV_MAX_N_IO_THREADS];
+static ulint os_aio_thread_io_writes[SRV_MAX_N_IO_THREADS];
+
+/* Number of IO operations done. One request can be for N pages. */
+static ulint os_aio_thread_io_requests[SRV_MAX_N_IO_THREADS];
+
+/* usecs spent blocked on an IO request */
+static double os_aio_thread_io_wait[SRV_MAX_N_IO_THREADS];
+/* max usecs spent blocked on an IO request */
+static double os_aio_thread_max_io_wait[SRV_MAX_N_IO_THREADS];
+
+/* Number of IO global segments. An IO handler thread is created for each
+global segment, except for the segment associated with os_aio_sync_array.
+Several segments can be associated with os_aio_{read,write}_array. One
+segment is created for each of the other arrays. This is also the number
+of valid entries in srv_io_thread_reads, srv_io_thread_writes,
+srv_io_thread_op_info, srv_io_thread_function and os_aio_segment_wait_events. */
 static ulint	os_aio_n_segments	= ULINT_UNDEFINED;
 
-/* If the following is TRUE, read i/o handler threads try to
-wait until a batch of new read requests have been posted */
-static ibool	os_aio_recommend_sleep_for_read_threads	= FALSE;
+/* Set to TRUE to temporarily block reads from being scheduled while a batch
+of read requests is added to allow them to be merged by the IO handler thread
+if they are adjacent. Declared volatile because we don't want this to be
+read from a register in a loop when another thread may change the value in
+memory.
+*/
+static volatile ibool	os_aio_recommend_sleep_for_read_threads	= FALSE;
 
 ulint	os_n_file_reads		= 0;
 ulint	os_bytes_read_since_printout = 0;
@@ -165,6 +220,14 @@ ulint	os_file_n_pending_pwrites = 0;
 ulint	os_n_pending_writes = 0;
 ulint	os_n_pending_reads = 0;
 
+static double time_usecs() {
+  ulint sec, ms;
+  if (ut_usectime(&sec, &ms))
+    return 0;
+  else
+    return sec * 1000000.0 + ms;
+}
+
 /***************************************************************************
 Gets the operating system version. Currently works only on Windows. */
 
@@ -2884,9 +2947,8 @@ os_aio_array_t*
 os_aio_array_create(
 /*================*/
 				/* out, own: aio array */
-	ulint	n,		/* in: maximum number of pending aio operations
-				allowed; n must be divisible by n_segments */
-	ulint	n_segments)	/* in: number of segments in the aio array */
+	ulint	n)	/* in: maximum number of pending aio operations
+				allowed */
 {
 	os_aio_array_t*	array;
 	ulint		i;
@@ -2895,7 +2957,6 @@ os_aio_array_create(
 	OVERLAPPED*	over;
 #endif
 	ut_a(n > 0);
-	ut_a(n_segments > 0);
 
 	array = ut_malloc(sizeof(os_aio_array_t));
 
@@ -2906,7 +2967,6 @@ os_aio_array_create(
 	os_event_set(array->is_empty);
 
 	array->n_slots		= n;
-	array->n_segments	= n_segments;
 	array->n_reserved	= 0;
 	array->slots		= ut_malloc(n * sizeof(os_aio_slot_t));
 #ifdef __WIN__
@@ -2933,70 +2993,75 @@ os_aio_array_create(
 
 /****************************************************************************
 Initializes the asynchronous io system. Calls also os_io_init_simple.
-Creates a separate aio array for
-non-ibuf read and write, a third aio array for the ibuf i/o, with just one
-segment, two aio arrays for log reads and writes with one segment, and a
-synchronous aio array of the specified size. The combined number of segments
-in the three first aio arrays is the parameter n_segments given to the
-function. The caller must create an i/o handler thread for each segment in
-the four first arrays, but not for the sync aio array. */
+Creates an aio array for each of non-ibuf read, non-ibuf write, ibuf IO,
+log IO, and synchronous IO. The caller must create i/o handler thread for all
+but the synchronous aio array. Multiple threads can access the same array for
+the non-ibuf read (prefetch) and write (flush dirty buffer pages) arrays.
+Return the number of AIO handler threads. */
 
-void
+ulint
 os_aio_init(
 /*========*/
-	ulint	n,		/* in: maximum number of pending aio operations
-				allowed; n must be divisible by n_segments */
-	ulint	n_segments,	/* in: combined number of segments in the four
-				first aio arrays; must be >= 4 */
+	ulint	ios_per_array,	/* in: maximum number of pending aio operations
+                                 allowed per array */
+	ulint	n_read_threads, /* in: number of read threads */
+	ulint	n_write_threads, /* in: number of write threads */
 	ulint	n_slots_sync)	/* in: number of slots in the sync aio array */
 {
-	ulint	n_read_segs;
-	ulint	n_write_segs;
-	ulint	n_per_seg;
 	ulint	i;
+	ulint   n_segments = 2 + n_read_threads + n_write_threads;
 #ifdef POSIX_ASYNC_IO
 	sigset_t   sigset;
 #endif
-	ut_ad(n % n_segments == 0);
-	ut_ad(n_segments >= 4);
+	ut_a(ios_per_array >= OS_AIO_N_PENDING_IOS_PER_THREAD);
+	ut_a(n_read_threads >= 1 && n_read_threads <= 64);
+	ut_a(n_write_threads >= 1 && n_write_threads <= 64);
+	ut_a(n_segments < SRV_MAX_N_IO_THREADS);
 
 	os_io_init_simple();
 
 	for (i = 0; i < n_segments; i++) {
 		srv_set_io_thread_op_info(i, "not started yet");
-	}
+		os_aio_thread_io_reads[i] = 0;
+		os_aio_thread_io_writes[i] = 0;
+		os_aio_thread_io_requests[i] = 0;
+		os_aio_thread_buffer[i] = 0;
+		os_aio_thread_buffer_size[i] = 0;
+		os_aio_thread_io_wait[i] = 0;
+		os_aio_thread_max_io_wait[i] = 0;
+	}
+
+ 	os_aio_read_threads = n_read_threads;
+ 	os_aio_write_threads = n_write_threads;
+ 	os_aio_first_write_segment = os_aio_first_read_segment + os_aio_read_threads;
+ 
+ 	fprintf(stderr,
+ 		"InnoDB: ios_per_array %lu read threads %lu write threads %lu\n",
+ 		ios_per_array, os_aio_read_threads, os_aio_write_threads);
 
-	n_per_seg = n / n_segments;
-	n_write_segs = (n_segments - 2) / 2;
-	n_read_segs = n_segments - 2 - n_write_segs;
-
-	/* fprintf(stderr, "Array n per seg %lu\n", n_per_seg); */
-
-	os_aio_ibuf_array = os_aio_array_create(n_per_seg, 1);
+	os_aio_ibuf_array = os_aio_array_create(ios_per_array);
 
 	srv_io_thread_function[0] = "insert buffer thread";
 
-	os_aio_log_array = os_aio_array_create(n_per_seg, 1);
+	os_aio_log_array = os_aio_array_create(ios_per_array);
 
 	srv_io_thread_function[1] = "log thread";
 
-	os_aio_read_array = os_aio_array_create(n_read_segs * n_per_seg,
-						n_read_segs);
-	for (i = 2; i < 2 + n_read_segs; i++) {
+	os_aio_read_array = os_aio_array_create(ios_per_array);
+	for (i = os_aio_first_read_segment; i < os_aio_first_write_segment; i++) {
 		ut_a(i < SRV_MAX_N_IO_THREADS);
 		srv_io_thread_function[i] = "read thread";
 	}
 
-	os_aio_write_array = os_aio_array_create(n_write_segs * n_per_seg,
-						 n_write_segs);
-	for (i = 2 + n_read_segs; i < n_segments; i++) {
+	os_aio_write_array = os_aio_array_create(ios_per_array);
+	for (i = os_aio_first_write_segment; i < n_segments; i++) {
 		ut_a(i < SRV_MAX_N_IO_THREADS);
 		srv_io_thread_function[i] = "write thread";
 	}
 
-	os_aio_sync_array = os_aio_array_create(n_slots_sync, 1);
+	os_aio_sync_array = os_aio_array_create(n_slots_sync);
 
-	os_aio_n_segments = n_segments;
+	os_aio_n_segments = 2 + os_aio_read_threads + os_aio_write_threads;
 
 	os_aio_validate();
 
@@ -3024,7 +3089,8 @@ os_aio_init(
 
 	pthread_sigmask(SIG_BLOCK, &sigset, NULL); */
 #endif
-		}
+	return os_aio_n_segments;
+}
 
 #ifdef WIN_ASYNC_IO
 /****************************************************************************
@@ -3082,76 +3148,28 @@ os_aio_wait_until_no_pending_writes(void
 }
 
 /**************************************************************************
-Calculates segment number for a slot. */
+Calculates aio array from global segment number. */
 static
-ulint
-os_aio_get_segment_no_from_slot(
-/*============================*/
-				/* out: segment number (which is the number
-				used by, for example, i/o-handler threads) */
-	os_aio_array_t*	array,	/* in: aio wait array */
-	os_aio_slot_t*	slot)	/* in: slot in this array */
-{
-	ulint	segment;
-	ulint	seg_len;
-
-	if (array == os_aio_ibuf_array) {
-		segment = 0;
-
-	} else if (array == os_aio_log_array) {
-		segment = 1;
-
-	} else if (array == os_aio_read_array) {
-		seg_len = os_aio_read_array->n_slots
-			/ os_aio_read_array->n_segments;
-
-		segment = 2 + slot->pos / seg_len;
-	} else {
-		ut_a(array == os_aio_write_array);
-		seg_len = os_aio_write_array->n_slots
-			/ os_aio_write_array->n_segments;
-
-		segment = os_aio_read_array->n_segments + 2
-			+ slot->pos / seg_len;
-	}
-
-	return(segment);
-}
-
-/**************************************************************************
-Calculates local segment number and aio array from global segment number. */
-static
-ulint
-os_aio_get_array_and_local_segment(
+os_aio_array_t*
+os_aio_get_array(
 /*===============================*/
-					/* out: local segment number within
-					the aio array */
-	os_aio_array_t** array,		/* out: aio wait array */
+	/* out: aio wait array */
 	ulint		 global_segment)/* in: global segment number */
 {
-	ulint	segment;
-
 	ut_a(global_segment < os_aio_n_segments);
 
 	if (global_segment == 0) {
-		*array = os_aio_ibuf_array;
-		segment = 0;
-
+		return os_aio_ibuf_array;
+  
 	} else if (global_segment == 1) {
-		*array = os_aio_log_array;
-		segment = 0;
+		return os_aio_log_array;
 
-	} else if (global_segment < os_aio_read_array->n_segments + 2) {
-		*array = os_aio_read_array;
+	} else if (global_segment < os_aio_first_write_segment) {
+		return os_aio_read_array;
 
-		segment = global_segment - 2;
 	} else {
-		*array = os_aio_write_array;
-
-		segment = global_segment - (os_aio_read_array->n_segments + 2);
-	}
-
-	return(segment);
+		return os_aio_write_array;
+  	}
 }
 
 /***********************************************************************
@@ -3273,7 +3291,7 @@ loop:
 			break;
 		}
 	}
-
+	ut_a(i < array->n_slots);
 	array->n_reserved++;
 
 	if (array->n_reserved == 1) {
@@ -3295,7 +3313,7 @@ loop:
 	slot->buf      = buf;
 	slot->offset   = offset;
 	slot->offset_high = offset_high;
-	slot->io_already_done = FALSE;
+	slot->status = OS_AIO_NOT_ISSUED;
 
 #ifdef WIN_ASYNC_IO
 	control = &(slot->control);
@@ -3348,6 +3366,7 @@ os_aio_array_free_slot(
 	ut_ad(slot->reserved);
 
 	slot->reserved = FALSE;
+ 	slot->status = OS_AIO_NOT_ISSUED;
 
 	array->n_reserved--;
 
@@ -3371,39 +3390,58 @@ static
 void
 os_aio_simulated_wake_handler_thread(
 /*=================================*/
-	ulint	global_segment)	/* in: the number of the segment in the aio
-				arrays */
+		os_aio_array_t* array)	/* in: aio array for which wakeup is done */
 {
-	os_aio_array_t*	array;
 	os_aio_slot_t*	slot;
-	ulint		segment;
 	ulint		n;
 	ulint		i;
 
 	ut_ad(!os_aio_use_native_aio);
+ 	n = array->n_slots;
 
-	segment = os_aio_get_array_and_local_segment(&array, global_segment);
-
-	n = array->n_slots / array->n_segments;
-
-	/* Look through n slots after the segment * n'th slot */
+	/* Look through n slots */
 
 	os_mutex_enter(array->mutex);
 
 	for (i = 0; i < n; i++) {
-		slot = os_aio_array_get_nth_slot(array, i + segment * n);
-
-		if (slot->reserved) {
-			/* Found an i/o request */
-
-			break;
-		}
+		slot = os_aio_array_get_nth_slot(array, i);
+ 
+		if (slot->reserved &&
+		    (slot->status == OS_AIO_NOT_ISSUED ||
+		     slot->status == OS_AIO_DONE)) {
+			/* Found an i/o request
+			   OS_AIO_NOT_ISSUED means the read or write request has
+			   * yet to be done. OS_AIO_DONE means the request has been
+			   * done but it was part of a set of requests merged into
+			   * one read or write call and was not the first block in
+			   * the request, so the handling of the IO completion for
+			   * that block has not been done. */
+  			break;
+  		}
 	}
 
 	os_mutex_exit(array->mutex);
 
 	if (i < n) {
-		os_event_set(os_aio_segment_wait_events[global_segment]);
+		if (array == os_aio_ibuf_array) {
+			os_event_set(os_aio_segment_wait_events[0]);
+
+		} else if (array == os_aio_log_array) {
+			os_event_set(os_aio_segment_wait_events[1]);
+
+		} else if (array == os_aio_read_array) {
+			ulint	x;
+			for (x = os_aio_first_read_segment; x < os_aio_first_write_segment; x++)
+				os_event_set(os_aio_segment_wait_events[x]);
+
+		} else if (array == os_aio_write_array) {
+			ulint	x;
+			for (x = os_aio_first_write_segment; x < os_aio_n_segments; x++)
+				os_event_set(os_aio_segment_wait_events[x]);
+
+		} else {
+			ut_a(0);
+		}
 	}
 }
 
@@ -3414,8 +3452,6 @@ void
 os_aio_simulated_wake_handler_threads(void)
 /*=======================================*/
 {
-	ulint	i;
-
 	if (os_aio_use_native_aio) {
 		/* We do not use simulated aio: do nothing */
 
@@ -3423,10 +3459,11 @@ os_aio_simulated_wake_handler_threads(vo
 	}
 
 	os_aio_recommend_sleep_for_read_threads	= FALSE;
-
-	for (i = 0; i < os_aio_n_segments; i++) {
-		os_aio_simulated_wake_handler_thread(i);
-	}
+  
+	os_aio_simulated_wake_handler_thread(os_aio_ibuf_array);
+	os_aio_simulated_wake_handler_thread(os_aio_log_array);
+	os_aio_simulated_wake_handler_thread(os_aio_read_array);
+	os_aio_simulated_wake_handler_thread(os_aio_write_array);
 }
 
 /**************************************************************************
@@ -3439,18 +3476,13 @@ void
 os_aio_simulated_put_read_threads_to_sleep(void)
 /*============================================*/
 {
-	os_aio_array_t*	array;
 	ulint		g;
 
+	/* TODO(mcallaghan): provide similar function for write? */
 	os_aio_recommend_sleep_for_read_threads	= TRUE;
-
-	for (g = 0; g < os_aio_n_segments; g++) {
-		os_aio_get_array_and_local_segment(&array, g);
-
-		if (array == os_aio_read_array) {
-
-			os_event_reset(os_aio_segment_wait_events[g]);
-		}
+  
+	for (g = os_aio_first_read_segment; g < os_aio_first_write_segment; g++) {
+		os_event_reset(os_aio_segment_wait_events[g]);
 	}
 }
 
@@ -3580,9 +3612,7 @@ try_again:
 #endif
 		} else {
 			if (!wake_later) {
-				os_aio_simulated_wake_handler_thread(
-					os_aio_get_segment_no_from_slot(
-						array, slot));
+				os_aio_simulated_wake_handler_thread(array);
 			}
 		}
 	} else if (type == OS_FILE_WRITE) {
@@ -3598,9 +3628,7 @@ try_again:
 #endif
 		} else {
 			if (!wake_later) {
-				os_aio_simulated_wake_handler_thread(
-					os_aio_get_segment_no_from_slot(
-						array, slot));
+				os_aio_simulated_wake_handler_thread(array);
 			}
 		}
 	} else {
@@ -3666,7 +3694,7 @@ ibool
 os_aio_windows_handle(
 /*==================*/
 				/* out: TRUE if the aio operation succeeded */
-	ulint	segment,	/* in: the number of the segment in the aio
+	ulint	global_segment,	/* in: the number of the segment in the aio
 				arrays to wait for; segment 0 is the ibuf
 				i/o thread, segment 1 the log i/o thread,
 				then follow the non-ibuf read threads, and as
@@ -3684,7 +3712,6 @@ os_aio_windows_handle(
 	void**	message2,
 	ulint*	type)		/* out: OS_FILE_WRITE or ..._READ */
 {
-	ulint		orig_seg	= segment;
 	os_aio_array_t*	array;
 	os_aio_slot_t*	slot;
 	ulint		n;
@@ -3693,39 +3720,35 @@ os_aio_windows_handle(
 	BOOL		ret;
 	DWORD		len;
 
-	if (segment == ULINT_UNDEFINED) {
+	if (global_segment == ULINT_UNDEFINED) {
 		array = os_aio_sync_array;
-		segment = 0;
 	} else {
-		segment = os_aio_get_array_and_local_segment(&array, segment);
+		array = os_aio_get_array(global_segment);
 	}
 
 	/* NOTE! We only access constant fields in os_aio_array. Therefore
 	we do not have to acquire the protecting mutex yet */
 
 	ut_ad(os_aio_validate());
-	ut_ad(segment < array->n_segments);
 
-	n = array->n_slots / array->n_segments;
+	n = array->n_slots;
 
 	if (array == os_aio_sync_array) {
 		os_event_wait(os_aio_array_get_nth_slot(array, pos)->event);
 		i = pos;
 	} else {
-		srv_set_io_thread_op_info(orig_seg, "wait Windows aio");
-		i = os_event_wait_multiple(n,
-					   (array->native_events)
-					   + segment * n);
+		srv_set_io_thread_op_info(global_segment, "wait Windows aio");
+		i = os_event_wait_multiple(n, (array->native_events));
 	}
 
 	os_mutex_enter(array->mutex);
 
-	slot = os_aio_array_get_nth_slot(array, i + segment * n);
+	slot = os_aio_array_get_nth_slot(array, i);
 
 	ut_a(slot->reserved);
 
-	if (orig_seg != ULINT_UNDEFINED) {
-		srv_set_io_thread_op_info(orig_seg,
+	if (global_segment != ULINT_UNDEFINED) {
+		srv_set_io_thread_op_info(global_segment,
 					  "get windows aio return value");
 	}
 
@@ -3898,14 +3921,16 @@ os_aio_simulated_handle(
 	ulint*	type)		/* out: OS_FILE_WRITE or ..._READ */
 {
 	os_aio_array_t*	array;
-	ulint		segment;
 	os_aio_slot_t*	slot;
 	os_aio_slot_t*	slot2;
 	os_aio_slot_t*	consecutive_ios[OS_AIO_MERGE_N_CONSECUTIVE];
+	os_aio_slot_t*  lowest_request;
+	os_aio_slot_t*	oldest_request;
 	ulint		n_consecutive;
 	ulint		total_len;
 	ulint		offs;
 	ulint		lowest_offset;
+	ulint		oldest_offset;
 	ulint		biggest_age;
 	ulint		age;
 	byte*		combined_buf;
@@ -3914,7 +3939,9 @@ os_aio_simulated_handle(
 	ulint		n;
 	ulint		i;
 
-	segment = os_aio_get_array_and_local_segment(&array, global_segment);
+ 	double          start_usecs, stop_usecs, elapsed_usecs;
+ 	time_t          now;
+ 	array = os_aio_get_array(global_segment);	
 
 restart:
 	/* NOTE! We only access constant fields in os_aio_array. Therefore
@@ -3923,11 +3950,10 @@ restart:
 	srv_set_io_thread_op_info(global_segment,
 				  "looking for i/o requests (a)");
 	ut_ad(os_aio_validate());
-	ut_ad(segment < array->n_segments);
 
-	n = array->n_slots / array->n_segments;
+	n = array->n_slots;
 
-	/* Look through n slots after the segment * n'th slot */
+	/* Look through n slots */
 
 	if (array == os_aio_read_array
 	    && os_aio_recommend_sleep_for_read_threads) {
@@ -3947,9 +3973,9 @@ restart:
 	done */
 
 	for (i = 0; i < n; i++) {
-		slot = os_aio_array_get_nth_slot(array, i + segment * n);
+		slot = os_aio_array_get_nth_slot(array, i);
 
-		if (slot->reserved && slot->io_already_done) {
+		if (slot->reserved && slot->status == OS_AIO_DONE) {
 
 			if (os_aio_print_debug) {
 				fprintf(stderr,
@@ -3964,74 +3990,64 @@ restart:
 		}
 	}
 
-	n_consecutive = 0;
-
-	/* If there are at least 2 seconds old requests, then pick the oldest
-	one to prevent starvation. If several requests have the same age,
-	then pick the one at the lowest offset. */
-
 	biggest_age = 0;
-	lowest_offset = ULINT_MAX;
+	now = time(NULL);
+        oldest_request = lowest_request = NULL;
+        oldest_offset = lowest_offset = ULINT_MAX;
 
+        /* Find the oldest request and the request with the smallest offset */
 	for (i = 0; i < n; i++) {
-		slot = os_aio_array_get_nth_slot(array, i + segment * n);
+		slot = os_aio_array_get_nth_slot(array, i);
 
-		if (slot->reserved) {
-			age = (ulint)difftime(time(NULL),
-					      slot->reservation_time);
+		if (slot->reserved && slot->status == OS_AIO_NOT_ISSUED) {
+			age = (ulint)difftime(now, slot->reservation_time);
 
+			/* If there are at least 2 seconds old requests, then pick the oldest
+			   one to prevent starvation. If several requests have the same age,
+			   then pick the one at the lowest offset. */
 			if ((age >= 2 && age > biggest_age)
 			    || (age >= 2 && age == biggest_age
-				&& slot->offset < lowest_offset)) {
-
-				/* Found an i/o request */
-				consecutive_ios[0] = slot;
-
-				n_consecutive = 1;
+			        && slot->offset < oldest_offset)) {
 
+			        /* Found an i/o request */
 				biggest_age = age;
-				lowest_offset = slot->offset;
+				oldest_request = slot;
+				oldest_offset = slot->offset;
 			}
-		}
-	}
-
-	if (n_consecutive == 0) {
-		/* There were no old requests. Look for an i/o request at the
-		lowest offset in the array (we ignore the high 32 bits of the
-		offset in these heuristics) */
-
-		lowest_offset = ULINT_MAX;
 
-		for (i = 0; i < n; i++) {
-			slot = os_aio_array_get_nth_slot(array,
-							 i + segment * n);
+			/* Look for an i/o request at the lowest offset in the array
+			 * (we ignore the high 32 bits of the offset) */
+			if (slot->offset < lowest_offset) {
+			        /* Found an i/o request */
+				lowest_request = slot;
 
-			if (slot->reserved && slot->offset < lowest_offset) {
 
-				/* Found an i/o request */
-				consecutive_ios[0] = slot;
-
-				n_consecutive = 1;
 
 				lowest_offset = slot->offset;
 			}
 		}
 	}
 
-	if (n_consecutive == 0) {
+	if (!lowest_request && !oldest_request) {
 
 		/* No i/o requested at the moment */
 
 		goto wait_for_io;
 	}
 
-	slot = consecutive_ios[0];
-
+        if (oldest_request) {
+		slot = oldest_request;
+        } else {
+		slot = lowest_request;
+        }
+        consecutive_ios[0] = slot;
+	n_consecutive = 1;
+  
 	/* Check if there are several consecutive blocks to read or write */
 
 consecutive_loop:
 	for (i = 0; i < n; i++) {
-		slot2 = os_aio_array_get_nth_slot(array, i + segment * n);
+		slot2 = os_aio_array_get_nth_slot(array, i);
 
 		if (slot2->reserved && slot2 != slot
 		    && slot2->offset == slot->offset + slot->len
@@ -4039,7 +4055,8 @@ consecutive_loop:
 		    && slot->offset + slot->len > slot->offset
 		    && slot2->offset_high == slot->offset_high
 		    && slot2->type == slot->type
-		    && slot2->file == slot->file) {
+		    && slot2->file == slot->file
+		    && slot2->status == OS_AIO_NOT_ISSUED) { 
 
 			/* Found a consecutive i/o request */
 
@@ -4048,7 +4065,8 @@ consecutive_loop:
 
 			slot = slot2;
 
-			if (n_consecutive < OS_AIO_MERGE_N_CONSECUTIVE) {
+			if (n_consecutive < OS_AIO_MERGE_N_CONSECUTIVE &&
+ 			    n_consecutive < srv_max_merged_io) {
 
 				goto consecutive_loop;
 			} else {
@@ -4068,6 +4086,8 @@ consecutive_loop:
 
 	for (i = 0; i < n_consecutive; i++) {
 		total_len += consecutive_ios[i]->len;
+		ut_a(consecutive_ios[i]->status == OS_AIO_NOT_ISSUED);
+		consecutive_ios[i]->status = OS_AIO_ISSUED;
 	}
 
 	if (n_consecutive == 1) {
@@ -4075,7 +4095,16 @@ consecutive_loop:
 		combined_buf = slot->buf;
 		combined_buf2 = NULL;
 	} else {
-		combined_buf2 = ut_malloc(total_len + UNIV_PAGE_SIZE);
+		if ((total_len + UNIV_PAGE_SIZE) > os_aio_thread_buffer_size[global_segment]) {
+
+			if (os_aio_thread_buffer[global_segment])
+				ut_free(os_aio_thread_buffer[global_segment]);
+
+			os_aio_thread_buffer[global_segment] = ut_malloc(total_len + UNIV_PAGE_SIZE);
+ 
+			os_aio_thread_buffer_size[global_segment] = total_len + UNIV_PAGE_SIZE;
+		}
+		combined_buf2 = os_aio_thread_buffer[global_segment];
 
 		ut_a(combined_buf2);
 
@@ -4086,6 +4115,9 @@ consecutive_loop:
 	this assumes that there is just one i/o-handler thread serving
 	a single segment of slots! */
 
+	ut_a(slot->reserved);
+	ut_a(slot->status == OS_AIO_ISSUED);
+
 	os_mutex_exit(array->mutex);
 
 	if (slot->type == OS_FILE_WRITE && n_consecutive > 1) {
@@ -4112,6 +4144,7 @@ consecutive_loop:
 
 	/* Do the i/o with ordinary, synchronous i/o functions: */
 	if (slot->type == OS_FILE_WRITE) {
+		os_aio_thread_io_writes[global_segment] += n_consecutive;
 		if (array == os_aio_write_array) {
 			if ((total_len % UNIV_PAGE_SIZE != 0)
 			    || (slot->offset % UNIV_PAGE_SIZE != 0)) {
@@ -4126,18 +4159,30 @@ consecutive_loop:
 
 			os_file_check_page_trailers(combined_buf, total_len);
 		}
-
+ 		start_usecs = time_usecs();
 		ret = os_file_write(slot->name, slot->file, combined_buf,
 				    slot->offset, slot->offset_high,
 				    total_len);
+ 		stop_usecs = time_usecs();
+                elapsed_usecs = stop_usecs - start_usecs;
+                if (elapsed_usecs < 0) elapsed_usecs = 0;
 
 		if (array == os_aio_write_array) {
 			os_file_check_page_trailers(combined_buf, total_len);
 		}
 	} else {
+		start_usecs = time_usecs();
+ 		os_aio_thread_io_reads[global_segment] += n_consecutive;
 		ret = os_file_read(slot->file, combined_buf,
 				   slot->offset, slot->offset_high, total_len);
-	}
+		stop_usecs = time_usecs();
+                elapsed_usecs = stop_usecs - start_usecs;
+                if (elapsed_usecs < 0) elapsed_usecs = 0;
+	}
+ 	if (elapsed_usecs > os_aio_thread_max_io_wait[global_segment])
+ 		os_aio_thread_max_io_wait[global_segment] = elapsed_usecs;
+ 	os_aio_thread_io_wait[global_segment] += elapsed_usecs;
+ 	os_aio_thread_io_requests[global_segment]++;
 
 	ut_a(ret);
 	srv_set_io_thread_op_info(global_segment, "file i/o done");
@@ -4160,16 +4205,13 @@ consecutive_loop:
 		}
 	}
 
-	if (combined_buf2) {
-		ut_free(combined_buf2);
-	}
-
 	os_mutex_enter(array->mutex);
 
 	/* Mark the i/os done in slots */
 
 	for (i = 0; i < n_consecutive; i++) {
-		consecutive_ios[i]->io_already_done = TRUE;
+		ut_a(consecutive_ios[i]->status == OS_AIO_ISSUED);
+		consecutive_ios[i]->status = OS_AIO_DONE;
 	}
 
 	/* We return the messages for the first slot now, and if there were
@@ -4179,6 +4221,8 @@ consecutive_loop:
 slot_io_done:
 
 	ut_a(slot->reserved);
+	ut_a(slot->status == OS_AIO_DONE);
+	slot->status = OS_AIO_CLAIMED;
 
 	*message1 = slot->message1;
 	*message2 = slot->message2;
@@ -4188,6 +4232,7 @@ slot_io_done:
 	os_mutex_exit(array->mutex);
 
 	os_aio_array_free_slot(array, slot);
+ 	srv_set_io_thread_op_info(global_segment, "exited handler");
 
 	return(ret);
 
@@ -4234,7 +4279,6 @@ os_aio_array_validate(
 	os_mutex_enter(array->mutex);
 
 	ut_a(array->n_slots > 0);
-	ut_a(array->n_segments > 0);
 
 	for (i = 0; i < array->n_slots; i++) {
 		slot = os_aio_array_get_nth_slot(array, i);
@@ -4284,11 +4328,19 @@ os_aio_print(
 	double		time_elapsed;
 	double		avg_bytes_read;
 	ulint		i;
-
-	for (i = 0; i < srv_n_file_io_threads; i++) {
-		fprintf(file, "I/O thread %lu state: %s (%s)", (ulong) i,
-			srv_io_thread_op_info[i],
-			srv_io_thread_function[i]);
+ 	ulint		num_issued, num_done, num_claimed;
+  
+	for (i = 0; i < os_aio_n_segments; i++) {
+		fprintf(file,
+			"I/O thread %lu state: %s (%s) reads %lu writes %lu "
+			"requests %lu io secs %lf io msecs/request %lf max_io_wait %lf",
+			i, srv_io_thread_op_info[i], srv_io_thread_function[i],
+			os_aio_thread_io_reads[i], os_aio_thread_io_writes[i],
+			os_aio_thread_io_requests[i],
+			os_aio_thread_io_wait[i] / 1000000.0,
+			os_aio_thread_io_requests[i] ?
+			os_aio_thread_io_wait[i] / os_aio_thread_io_requests[i] / 1000.0 : 0.0,
+			os_aio_thread_max_io_wait[i] / 1000.0);
 
 #ifndef __WIN__
 		if (os_aio_segment_wait_events[i]->is_set) {
@@ -4308,14 +4360,21 @@ loop:
 	os_mutex_enter(array->mutex);
 
 	ut_a(array->n_slots > 0);
-	ut_a(array->n_segments > 0);
-
 	n_reserved = 0;
+	num_done = num_issued = num_claimed = 0;
 
 	for (i = 0; i < array->n_slots; i++) {
 		slot = os_aio_array_get_nth_slot(array, i);
 
 		if (slot->reserved) {
+ 			if (slot->status == OS_AIO_ISSUED)
+ 				num_issued++;
+ 			else if (slot->status == OS_AIO_DONE)
+ 				num_done++;
+ 			else {
+ 				ut_ad(slot->status == OS_AIO_CLAIMED);
+ 				num_claimed++;
+ 			}
 			n_reserved++;
 #if 0
 			fprintf(stderr, "Reserved slot, messages %p %p\n",
@@ -4361,6 +4420,13 @@ loop:
 		goto loop;
 	}
 
+ 	putc('\n', file);
+ 	fprintf(file,
+ 		"Summary of background IO slot status: %lu issued, "
+ 		"%lu done, %lu claimed, sleep set %d\n",
+ 		num_issued, num_done, num_claimed,
+ 		(int)os_aio_recommend_sleep_for_read_threads);
+
 	putc('\n', file);
 	current_time = time(NULL);
 	time_elapsed = 0.001 + difftime(current_time, os_last_printout);

=== modified file 'storage/innobase/row/row0sel.c'
--- a/storage/innobase/row/row0sel.c	2009-04-15 13:06:16 +0000
+++ b/storage/innobase/row/row0sel.c	2009-07-02 14:23:36 +0000
@@ -1249,7 +1249,7 @@ table_loop:
 			rw_lock_s_lock(&btr_search_latch);
 
 			search_latch_locked = TRUE;
-		} else if (btr_search_latch.writer_is_wait_ex) {
+		} else if (rw_lock_get_writer(&btr_search_latch) == RW_LOCK_WAIT_EX) {
 
 			/* There is an x-latch request waiting: release the
 			s-latch for a moment; as an s-latch here is often
@@ -3321,7 +3321,7 @@ row_search_for_mysql(
 	/* PHASE 0: Release a possible s-latch we are holding on the
 	adaptive hash index latch if there is someone waiting behind */
 
-	if (UNIV_UNLIKELY(btr_search_latch.writer != RW_LOCK_NOT_LOCKED)
+	if (UNIV_UNLIKELY(rw_lock_get_writer(&btr_search_latch) != RW_LOCK_NOT_LOCKED)
 	    && trx->has_search_latch) {
 
 		/* There is an x-latch request on the adaptive hash index:

=== modified file 'storage/innobase/srv/srv0srv.c'
--- a/storage/innobase/srv/srv0srv.c	2009-05-19 08:37:33 +0000
+++ b/storage/innobase/srv/srv0srv.c	2009-07-02 14:23:36 +0000
@@ -171,7 +171,16 @@ ulint	srv_awe_window_size	= 0;		/* size 
 ulint	srv_mem_pool_size	= ULINT_MAX;	/* size in bytes */
 ulint	srv_lock_table_size	= ULINT_MAX;
 
-ulint	srv_n_file_io_threads	= ULINT_MAX;
+
+ulint   srv_io_capacity         = ULINT_MAX;    /* Number of IO operations per
+                                                   second the server can do */
+
+ibool   srv_extra_dirty_writes = TRUE;  /* Write dirty pages to disk when pct
+                                           dirty < max dirty pct */
+
+ulint	srv_n_read_io_threads	= ULINT_MAX;
+ulint	srv_n_write_io_threads	= ULINT_MAX;
+ulint	srv_max_merged_io = 64;
 
 #ifdef UNIV_LOG_ARCHIVE
 ibool	srv_log_archive_on	= FALSE;
@@ -278,6 +287,7 @@ Value 10 should be good if there are les
 computer. Bigger computers need bigger values. Value 0 will disable the
 concurrency check. */
 
+ibool   srv_thread_concurrency_timer_based = TRUE;
 ulong	srv_thread_concurrency	= 0;
 ulong	srv_commit_concurrency	= 0;
 
@@ -340,10 +350,10 @@ ibool	srv_use_awe			= FALSE;
 ibool	srv_use_adaptive_hash_indexes	= TRUE;
 
 /*-------------------------------------------*/
-ulong	srv_n_spin_wait_rounds	= 20;
+ulong	srv_n_spin_wait_rounds	= 30;
 ulong	srv_n_free_tickets_to_enter = 500;
 ulong	srv_thread_sleep_delay = 10000;
-ulint	srv_spin_wait_delay	= 5;
+ulint	srv_spin_wait_delay	= 6;
 ibool	srv_priority_boost	= TRUE;
 
 ibool	srv_print_thread_releases	= FALSE;
@@ -409,6 +419,30 @@ FILE*	srv_misc_tmpfile;
 ulint	srv_main_thread_process_no	= 0;
 ulint	srv_main_thread_id		= 0;
 
+/* The following count work done by srv_master_thread. */
+
+/* Iterations by the 'once per second' loop */
+ulint   srv_main_1_second_loops         = 0;
+/* Calls to sleep by the 'once per second' loop */
+ulint   srv_main_sleeps                 = 0;
+/* Iterations by the 'once per 10 seconds' loop */
+ulint   srv_main_10_second_loops        = 0;
+/* Iterations of the loop bounded by the 'background_loop' label */
+ulint   srv_main_background_loops       = 0;
+/* Iterations of the loop bounded by the 'flush_loop' label */
+ulint   srv_main_flush_loops            = 0;
+/* Calls to log_buffer_flush_to_disk */
+ulint   srv_sync_flush                  = 0;
+/* Calls to log_buffer_flush_maybe_sync */
+ulint   srv_async_flush                 = 0;
+
+/* Number of microseconds threads wait because of
+innodb_thread_concurrency */
+static ib_longlong srv_thread_wait_mics = 0;
+
+/* Number of microseconds for spinlock delay */
+static ib_longlong srv_timed_spin_delay = 0;
+
 /*
 	IMPLEMENTATION OF THE SERVER MAIN PROGRAM
 	=========================================
@@ -628,6 +662,53 @@ are indexed by the type of the thread. *
 ulint	srv_n_threads_active[SRV_MASTER + 1];
 ulint	srv_n_threads[SRV_MASTER + 1];
 
+static void time_spin_delay()
+{
+  ulint start_sec, end_sec;
+  ulint start_usec, end_usec;
+  int i;
+
+  srv_timed_spin_delay = 0;
+
+  if (ut_usectime(&start_sec, &start_usec))
+    return;
+
+  for (i = 0; i < (int)SYNC_SPIN_ROUNDS; ++i)
+    ut_delay(ut_rnd_interval(0, srv_spin_wait_delay));
+
+  if (ut_usectime(&end_sec, &end_usec))
+    return;
+
+  srv_timed_spin_delay =ut_usecdiff(end_sec, end_usec,
+                                    start_sec, start_usec);
+}
+
+/*************************************************************************
+Prints counters for work done by srv_master_thread. */
+
+static
+void
+srv_print_extra(
+/*===================*/
+	FILE  *file)    /* in: output stream */
+{
+	fprintf(file, "srv_master_thread loops: %lu 1_second, %lu sleeps, "
+		"%lu 10_second, %lu background, %lu flush\n",
+		srv_main_1_second_loops, srv_main_sleeps,
+		srv_main_10_second_loops, srv_main_background_loops,
+		srv_main_flush_loops);
+	fprintf(file, "srv_master_thread log flush: %lu sync, %lu async\n",
+		srv_sync_flush, srv_async_flush);
+        fprintf(file, "srv_wait_thread_mics %lld microseconds, %.1f seconds\n",
+                srv_thread_wait_mics,
+                (double) srv_thread_wait_mics / 1000000.0);
+        fprintf(file,
+                "spinlock delay for %d delay %d rounds is %lld mics\n",
+                (int)srv_spin_wait_delay,
+                (int)SYNC_SPIN_ROUNDS,
+                srv_timed_spin_delay);
+}
+
 /*************************************************************************
 Sets the info describing an i/o thread current state. */
 
@@ -861,6 +942,8 @@ srv_init(void)
 	dict_table_t*		table;
 	ulint			i;
 
+        time_spin_delay();
+
 	srv_sys = mem_alloc(sizeof(srv_sys_t));
 
 	kernel_mutex_temp = mem_alloc(sizeof(mutex_t));
@@ -976,6 +1059,95 @@ ulong	srv_max_purge_lag		= 0;
 Puts an OS thread to wait if there are too many concurrent threads
 (>= srv_thread_concurrency) inside InnoDB. The threads wait in a FIFO queue. */
 
+#ifdef UNIV_SYNC_ATOMIC
+static void
+inc_srv_conc_n_threads(lint *n_threads)
+{
+  *n_threads = os_atomic_increment(&srv_conc_n_threads, 1);
+}
+
+static void
+dec_srv_conc_n_threads()
+{
+  os_atomic_increment(&srv_conc_n_threads, -1);
+}
+#endif
+
+static void
+print_already_in_error(trx_t* trx)
+{
+	ut_print_timestamp(stderr);
+	fputs("  InnoDB: Error: trying to declare trx"
+	      " to enter InnoDB, but\n"
+	      "InnoDB: it already is declared.\n", stderr);
+	trx_print(stderr, trx, 0);
+	putc('\n', stderr);
+        return;
+}
+
+#ifdef UNIV_SYNC_ATOMIC
+static void
+enter_innodb_with_tickets(trx_t* trx)
+{
+	trx->declared_to_be_inside_innodb = TRUE;
+	trx->n_tickets_to_enter_innodb = SRV_FREE_TICKETS_TO_ENTER;
+        return;
+}
+
+static void
+srv_conc_enter_innodb_timer_based(trx_t* trx)
+{
+        lint               conc_n_threads;
+        ibool              has_yielded = FALSE;
+        ulint              has_slept = 0;
+
+	if (trx->declared_to_be_inside_innodb) {
+                print_already_in_error(trx);
+        }
+retry:
+	if (srv_conc_n_threads < (lint) srv_thread_concurrency) {
+                inc_srv_conc_n_threads(&conc_n_threads);
+	        if (conc_n_threads <= (lint) srv_thread_concurrency) {
+                       enter_innodb_with_tickets(trx);
+                       return;
+                }
+                dec_srv_conc_n_threads(&conc_n_threads);
+       }
+       if (!has_yielded)
+       {
+               has_yielded = TRUE;
+               os_thread_yield();
+               goto retry;
+       }
+       if (trx->has_search_latch
+           || NULL != UT_LIST_GET_FIRST(trx->trx_locks)) {
+
+                inc_srv_conc_n_threads(&conc_n_threads);
+                enter_innodb_with_tickets(trx);
+                return;
+       }
+       if (has_slept < 2)
+       {
+               trx->op_info = "sleeping before entering InnoDB";
+               os_thread_sleep(10000);
+               trx->op_info = "";
+               has_slept++;
+       }
+       inc_srv_conc_n_threads(&conc_n_threads);
+       enter_innodb_with_tickets(trx);
+       return;
+}
+
+static void
+srv_conc_exit_innodb_timer_based(trx_t* trx)
+{
+        dec_srv_conc_n_threads();
+	trx->declared_to_be_inside_innodb = FALSE;
+	trx->n_tickets_to_enter_innodb = 0;
+        return;
+}
+#endif
+
 void
 srv_conc_enter_innodb(
 /*==================*/
@@ -1006,15 +1178,17 @@ srv_conc_enter_innodb(
 		return;
 	}
 
+#ifdef UNIV_SYNC_ATOMIC
+        if (srv_thread_concurrency_timer_based) {
+          srv_conc_enter_innodb_timer_based(trx);
+          return;
+        }
+#endif
+
 	os_fast_mutex_lock(&srv_conc_mutex);
 retry:
 	if (trx->declared_to_be_inside_innodb) {
-		ut_print_timestamp(stderr);
-		fputs("  InnoDB: Error: trying to declare trx"
-		      " to enter InnoDB, but\n"
-		      "InnoDB: it already is declared.\n", stderr);
-		trx_print(stderr, trx, 0);
-		putc('\n', stderr);
+                print_already_in_error(trx);
 		os_fast_mutex_unlock(&srv_conc_mutex);
 
 		return;
@@ -1143,19 +1317,27 @@ srv_conc_force_enter_innodb(
 	trx_t*	trx)	/* in: transaction object associated with the
 			thread */
 {
+
 	if (UNIV_LIKELY(!srv_thread_concurrency)) {
 
 		return;
 	}
 
 	ut_ad(srv_conc_n_threads >= 0);
-
+#ifdef UNIV_SYNC_ATOMIC
+        if (srv_thread_concurrency_timer_based) {
+                lint               conc_n_threads;
+
+                inc_srv_conc_n_threads(&conc_n_threads);
+	        trx->declared_to_be_inside_innodb = TRUE;
+	        trx->n_tickets_to_enter_innodb = 1;
+                return;
+        }
+#endif
 	os_fast_mutex_lock(&srv_conc_mutex);
-
 	srv_conc_n_threads++;
 	trx->declared_to_be_inside_innodb = TRUE;
 	trx->n_tickets_to_enter_innodb = 1;
-
 	os_fast_mutex_unlock(&srv_conc_mutex);
 }
 
@@ -1182,6 +1364,14 @@ srv_conc_force_exit_innodb(
 		return;
 	}
 
+#ifdef UNIV_SYNC_ATOMIC
+        if (srv_thread_concurrency_timer_based)
+        {
+                srv_conc_exit_innodb_timer_based(trx);
+                return;
+        }
+#endif
+
 	os_fast_mutex_lock(&srv_conc_mutex);
 
 	ut_ad(srv_conc_n_threads > 0);
@@ -1590,11 +1780,16 @@ srv_release_mysql_thread_if_suspended(
 /**********************************************************************
 Refreshes the values used to calculate per-second averages. */
 static
-void
+ibool
 srv_refresh_innodb_monitor_stats(void)
 /*==================================*/
 {
-	mutex_enter(&srv_innodb_monitor_mutex);
+	/* Sometimes we will skip stats update to avoid deadlock, since
+	since this function is called by the background wake-up thread */
+	if (mutex_enter_nowait(&srv_innodb_monitor_mutex)) {
+		/* mutex_enter_nowait returns 1 on failure */
+		return FALSE;
+	}
 
 	srv_last_monitor_time = time(NULL);
 
@@ -1613,6 +1808,7 @@ srv_refresh_innodb_monitor_stats(void)
 	srv_n_rows_read_old = srv_n_rows_read;
 
 	mutex_exit(&srv_innodb_monitor_mutex);
+	return TRUE;
 }
 
 /**********************************************************************
@@ -1621,11 +1817,7 @@ Outputs to a file the output of the Inno
 void
 srv_printf_innodb_monitor(
 /*======================*/
-	FILE*	file,		/* in: output stream */
-	ulint*	trx_start,	/* out: file position of the start of
-				the list of active transactions */
-	ulint*	trx_end)	/* out: file position of the end of
-				the list of active transactions */
+	FILE*	file)		/* in: output stream */
 {
 	double	time_elapsed;
 	time_t	current_time;
@@ -1653,6 +1845,11 @@ srv_printf_innodb_monitor(
 		"Per second averages calculated from the last %lu seconds\n",
 		(ulong)time_elapsed);
 
+  	fputs("----------\n"
+		"BACKGROUND THREAD\n"
+		"----------\n", file);
+        srv_print_extra(file);
+
 	fputs("----------\n"
 	      "SEMAPHORES\n"
 	      "----------\n", file);
@@ -1674,24 +1871,6 @@ srv_printf_innodb_monitor(
 
 	mutex_exit(&dict_foreign_err_mutex);
 
-	lock_print_info_summary(file);
-	if (trx_start) {
-		long	t = ftell(file);
-		if (t < 0) {
-			*trx_start = ULINT_UNDEFINED;
-		} else {
-			*trx_start = (ulint) t;
-		}
-	}
-	lock_print_info_all_transactions(file);
-	if (trx_end) {
-		long	t = ftell(file);
-		if (t < 0) {
-			*trx_end = ULINT_UNDEFINED;
-		} else {
-			*trx_end = (ulint) t;
-		}
-	}
 	fputs("--------\n"
 	      "FILE I/O\n"
 	      "--------\n", file);
@@ -1841,6 +2020,16 @@ srv_export_innodb_status(void)
 	export_vars.innodb_buffer_pool_pages_misc = buf_pool->max_size
 		- UT_LIST_GET_LEN(buf_pool->LRU)
 		- UT_LIST_GET_LEN(buf_pool->free);
+#ifdef UNIV_SYNC_ATOMIC
+	export_vars.innodb_have_sync_atomic = 1;
+#else
+	export_vars.innodb_have_sync_atomic = 0;
+#endif
+#ifdef UNIV_DISABLE_MEM_POOL
+	export_vars.innodb_heap_enabled = 0;
+#else
+	export_vars.innodb_heap_enabled = 1;
+#endif
 	export_vars.innodb_page_size = UNIV_PAGE_SIZE;
 	export_vars.innodb_log_waits = srv_log_waits;
 	export_vars.innodb_os_log_written = srv_os_log_written;
@@ -1870,6 +2059,7 @@ srv_export_innodb_status(void)
 	export_vars.innodb_rows_inserted = srv_n_rows_inserted;
 	export_vars.innodb_rows_updated = srv_n_rows_updated;
 	export_vars.innodb_rows_deleted = srv_n_rows_deleted;
+	export_vars.innodb_wake_ups = sync_wake_ups;
 
 	mutex_exit(&srv_innodb_monitor_mutex);
 }
@@ -1921,14 +2111,13 @@ loop:
 		last_monitor_time = time(NULL);
 
 		if (srv_print_innodb_monitor) {
-			srv_printf_innodb_monitor(stderr, NULL, NULL);
+			srv_printf_innodb_monitor(stderr);
 		}
 
 		if (srv_innodb_status) {
 			mutex_enter(&srv_monitor_file_mutex);
 			rewind(srv_monitor_file);
-			srv_printf_innodb_monitor(srv_monitor_file, NULL,
-						  NULL);
+			srv_printf_innodb_monitor(srv_monitor_file);
 			os_file_set_eof(srv_monitor_file);
 			mutex_exit(&srv_monitor_file_mutex);
 		}
@@ -2057,7 +2246,10 @@ exit_func:
 
 /*************************************************************************
 A thread which prints warnings about semaphore waits which have lasted
-too long. These can be used to track bugs which cause hangs. */
+too long. These can be used to track bugs which cause hangs.
+NOTE: This thread should not wait for any innodb mutexes or rw_locks.
+A deadlock could arise where the thread holding that lock requires waking
+by this background thread while this thread is blocked on that lock. */
 
 os_thread_ret_t
 srv_error_monitor_thread(
@@ -2069,10 +2261,6 @@ srv_error_monitor_thread(
 {
 	/* number of successive fatal timeouts observed */
 	ulint	fatal_cnt	= 0;
-	dulint	old_lsn;
-	dulint	new_lsn;
-
-	old_lsn = srv_start_lsn;
 
 #ifdef UNIV_DEBUG_THREAD_CREATION
 	fprintf(stderr, "Error monitor thread starts, id %lu\n",
@@ -2081,29 +2269,8 @@ srv_error_monitor_thread(
 loop:
 	srv_error_monitor_active = TRUE;
 
-	/* Try to track a strange bug reported by Harald Fuchs and others,
-	where the lsn seems to decrease at times */
-
-	new_lsn = log_get_lsn();
-
-	if (ut_dulint_cmp(new_lsn, old_lsn) < 0) {
-		ut_print_timestamp(stderr);
-		fprintf(stderr,
-			"  InnoDB: Error: old log sequence number %lu %lu"
-			" was greater\n"
-			"InnoDB: than the new log sequence number %lu %lu!\n"
-			"InnoDB: Please submit a bug report"
-			" to http://bugs.mysql.com\n",
-			(ulong) ut_dulint_get_high(old_lsn),
-			(ulong) ut_dulint_get_low(old_lsn),
-			(ulong) ut_dulint_get_high(new_lsn),
-			(ulong) ut_dulint_get_low(new_lsn));
-	}
-
-	old_lsn = new_lsn;
-
 	if (difftime(time(NULL), srv_last_monitor_time) > 60) {
-		/* We referesh InnoDB Monitor values so that averages are
+		/* We refresh InnoDB Monitor values so that averages are
 		printed from at most 60 last seconds */
 
 		srv_refresh_innodb_monitor_stats();
@@ -2194,6 +2361,14 @@ srv_wake_master_thread(void)
 }
 
 /*************************************************************************
+Returns the number of IO operations that is X percent of the capacity.
+
+PCT_IO(5) -> returns the number of IO operations that is 5% of the max
+where max is srv_io_capacity.
+*/
+#define PCT_IO(pct) ((ulint) (srv_io_capacity * ((double) pct / 100.0)))
+
+/*************************************************************************
 The master thread controlling the server. */
 
 os_thread_ret_t
@@ -2224,6 +2399,9 @@ srv_master_thread(
 	fprintf(stderr, "Master thread starts, id %lu\n",
 		os_thread_pf(os_thread_get_curr_id()));
 #endif
+        fprintf(stderr, "InnoDB master thread running with io_capacity %lu\n",
+                srv_io_capacity);
+
 	srv_main_thread_process_no = os_proc_get_number();
 	srv_main_thread_id = os_thread_pf(os_thread_get_curr_id());
 
@@ -2265,10 +2443,12 @@ loop:
 		n_ios_old = log_sys->n_log_ios + buf_pool->n_pages_read
 			+ buf_pool->n_pages_written;
 		srv_main_thread_op_info = "sleeping";
+		srv_main_1_second_loops++;	
 
 		if (!skip_sleep) {
 
 			os_thread_sleep(1000000);
+			srv_main_sleeps++;
 		}
 
 		skip_sleep = FALSE;
@@ -2294,27 +2474,28 @@ loop:
 
 		srv_main_thread_op_info = "flushing log";
 		log_buffer_flush_to_disk();
+		srv_sync_flush++;
 
 		srv_main_thread_op_info = "making checkpoint";
 		log_free_check();
 
-		/* If there were less than 5 i/os during the
-		one second sleep, we assume that there is free
-		disk i/o capacity available, and it makes sense to
-		do an insert buffer merge. */
+		/* If i/os during one second sleep were less than 5% of
+                capacity, we assume that there is free disk i/o capacity
+                available, and it makes sense to do an insert buffer merge. */
 
 		n_pend_ios = buf_get_n_pending_ios()
 			+ log_sys->n_pending_writes;
 		n_ios = log_sys->n_log_ios + buf_pool->n_pages_read
 			+ buf_pool->n_pages_written;
-		if (n_pend_ios < 3 && (n_ios - n_ios_old < 5)) {
+		if (n_pend_ios < PCT_IO(3) && (n_ios - n_ios_old < PCT_IO(5))) {
 			srv_main_thread_op_info = "doing insert buffer merge";
-			ibuf_contract_for_n_pages(
-				TRUE, srv_insert_buffer_batch_size / 4);
+			ibuf_contract_for_n_pages(TRUE, PCT_IO(20) / 4);
 
 			srv_main_thread_op_info = "flushing log";
 
-			log_buffer_flush_to_disk();
+			/* No fsync when srv_flush_log_at_trx_commit != 1 */
+			log_buffer_flush_maybe_sync();
+			srv_async_flush++;
 		}
 
 		if (UNIV_UNLIKELY(buf_get_modified_ratio_pct()
@@ -2323,7 +2504,8 @@ loop:
 			/* Try to keep the number of modified pages in the
 			buffer pool under the limit wished by the user */
 
-			n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100,
+			n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST,
+							  PCT_IO(100),
 							  ut_dulint_max);
 
 			/* If we had to do the flush, it may have taken
@@ -2351,30 +2533,40 @@ loop:
 	seconds */
 	mem_validate_all_blocks();
 #endif
-	/* If there were less than 200 i/os during the 10 second period,
-	we assume that there is free disk i/o capacity available, and it
-	makes sense to flush 100 pages. */
+	/* If i/os during the 10 second period were less than 200% of
+         capacity, we assume that there is free disk i/o capacity
+         available, and it makes sense to flush srv_io_capacity pages.
+
+         Note that this is done regardless of the fraction of dirty
+         pages relative to the max requested by the user. The one second
+         loop above requests writes for that case. The writes done here
+         are not required, and may be disabled. */
 
 	n_pend_ios = buf_get_n_pending_ios() + log_sys->n_pending_writes;
 	n_ios = log_sys->n_log_ios + buf_pool->n_pages_read
 		+ buf_pool->n_pages_written;
-	if (n_pend_ios < 3 && (n_ios - n_ios_very_old < 200)) {
+	if (srv_extra_dirty_writes &&
+	    n_pend_ios < 3 && (n_ios - n_ios_very_old < PCT_IO(200))) {
 
 		srv_main_thread_op_info = "flushing buffer pool pages";
-		buf_flush_batch(BUF_FLUSH_LIST, 100, ut_dulint_max);
+		buf_flush_batch(BUF_FLUSH_LIST, PCT_IO(100), ut_dulint_max);
 
 		srv_main_thread_op_info = "flushing log";
-		log_buffer_flush_to_disk();
+		/* No fsync when srv_flush_log_at_trx_commit != 1 */
+		log_buffer_flush_maybe_sync();
+		srv_async_flush++;
 	}
 
 	/* We run a batch of insert buffer merge every 10 seconds,
 	even if the server were active */
 
 	srv_main_thread_op_info = "doing insert buffer merge";
-	ibuf_contract_for_n_pages(TRUE, srv_insert_buffer_batch_size / 4);
+	ibuf_contract_for_n_pages(TRUE, PCT_IO(20) / 4);
 
 	srv_main_thread_op_info = "flushing log";
-	log_buffer_flush_to_disk();
+	/* No fsync when srv_flush_log_at_trx_commit != 1 */
+	log_buffer_flush_maybe_sync();
+	srv_async_flush++;
 
 	/* We run a full purge every 10 seconds, even if the server
 	were active */
@@ -2400,6 +2592,7 @@ loop:
 
 			log_buffer_flush_to_disk();
 			last_flush_time = current_time;
+			srv_sync_flush++;
 		}
 	}
 
@@ -2413,14 +2606,16 @@ loop:
 		(> 70 %), we assume we can afford reserving the disk(s) for
 		the time it requires to flush 100 pages */
 
-		n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100,
+		n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST,
+						  PCT_IO(100),
 						  ut_dulint_max);
 	} else {
 		/* Otherwise, we only flush a small number of pages so that
 		we do not unnecessarily use much disk i/o capacity from
 		other work */
 
-		n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 10,
+		n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST,
+						  PCT_IO(10),
 						  ut_dulint_max);
 	}
 
@@ -2454,7 +2649,7 @@ background_loop:
 
 	/* The server has been quiet for a while: start running background
 	operations */
-
+	srv_main_background_loops++;		
 	srv_main_thread_op_info = "doing background drop tables";
 
 	n_tables_to_drop = row_drop_tables_for_mysql_in_background();
@@ -2492,6 +2687,7 @@ background_loop:
 
 			log_buffer_flush_to_disk();
 			last_flush_time = current_time;
+                        srv_sync_flush++;
 		}
 	}
 
@@ -2509,8 +2705,11 @@ background_loop:
 	if (srv_fast_shutdown && srv_shutdown_state > 0) {
 		n_bytes_merged = 0;
 	} else {
-		n_bytes_merged = ibuf_contract_for_n_pages(
-			TRUE, srv_insert_buffer_batch_size);
+		/* This should do an amount of IO similar to the number of
+		* dirty pages that will be flushed in the call to
+		* buf_flush_batch below. Otherwise, the system favors
+		* clean pages over cleanup throughput. */
+		n_bytes_merged = ibuf_contract_for_n_pages(TRUE, PCT_IO(100));
 	}
 
 	srv_main_thread_op_info = "reserving kernel mutex";
@@ -2524,9 +2723,10 @@ background_loop:
 
 flush_loop:
 	srv_main_thread_op_info = "flushing buffer pool pages";
-
+	srv_main_flush_loops++;
 	if (srv_fast_shutdown < 2) {
-		n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST, 100,
+		n_pages_flushed = buf_flush_batch(BUF_FLUSH_LIST,
+						  PCT_IO(100),
 						  ut_dulint_max);
 	} else {
 		/* In the fastest shutdown we do not flush the buffer pool
@@ -2549,7 +2749,17 @@ flush_loop:
 
 	srv_main_thread_op_info = "flushing log";
 
-	log_buffer_flush_to_disk();
+	current_time = time(NULL);
+	if (difftime(current_time, last_flush_time) > 1) {
+		srv_main_thread_op_info = (char*) "flushing log";
+		log_buffer_flush_to_disk();
+		last_flush_time = current_time;
+ 		srv_sync_flush++;
+	} else {
+		/* No fsync when srv_flush_log_at_trx_commit != 1 */
+		log_buffer_flush_maybe_sync();
+		srv_async_flush++;
+	}
 
 	srv_main_thread_op_info = "making checkpoint";
 

=== modified file 'storage/innobase/srv/srv0start.c'
--- a/storage/innobase/srv/srv0start.c	2009-01-26 16:03:39 +0000
+++ b/storage/innobase/srv/srv0start.c	2009-07-02 14:23:36 +0000
@@ -986,6 +986,7 @@ innobase_start_or_create_for_mysql(void)
 	ulint	i;
 	ibool	srv_file_per_table_original_value  = srv_file_per_table;
 	mtr_t	mtr;
+	ulint	n_threads;
 #ifdef HAVE_DARWIN_THREADS
 # ifdef F_FULLFSYNC
 	/* This executable has been compiled on Mac OS X 10.3 or later.
@@ -1063,6 +1064,16 @@ innobase_start_or_create_for_mysql(void)
 		return(DB_ERROR);
 	}
 
+#ifdef UNIV_DISABLE_MEM_POOL
+	fprintf(stderr,
+		"InnoDB: The InnoDB memory heap has been disabled.\n");
+#endif
+
+#ifdef UNIV_SYNC_ATOMIC
+	fprintf(stderr,
+		"InnoDB: Mutex and rw_lock use atomics.\n");
+#endif
+
 	/* Since InnoDB does not currently clean up all its internal data
 	structures in MySQL Embedded Server Library server_end(), we
 	print an error message if someone tries to start up InnoDB a
@@ -1238,25 +1249,43 @@ innobase_start_or_create_for_mysql(void)
 		return(DB_ERROR);
 	}
 
+#ifdef __WIN__
+        /*
+           Need to hardcode this to 1 read and 1 write on Windows
+           while searching for problem causing this to crash when
+           higher number of threads are supported.
+        */
+        srv_n_read_io_threads = srv_n_write_io_threads = 1;
+#endif
 	/* Restrict the maximum number of file i/o threads */
-	if (srv_n_file_io_threads > SRV_MAX_N_IO_THREADS) {
-
-		srv_n_file_io_threads = SRV_MAX_N_IO_THREADS;
+	if ((srv_n_read_io_threads + srv_n_write_io_threads) > SRV_MAX_N_IO_THREADS) {
+		fprintf(stderr,
+			"InnoDB: requested too many read(%d) or write(%d) IO threads, max is %d\n",
+			(int)srv_n_read_io_threads,
+                        (int)srv_n_write_io_threads,
+                        SRV_MAX_N_IO_THREADS);	
+		return(DB_ERROR);
 	}
 
 	if (!os_aio_use_native_aio) {
-		/* In simulated aio we currently have use only for 4 threads */
-		srv_n_file_io_threads = 4;
-
-		os_aio_init(8 * SRV_N_PENDING_IOS_PER_THREAD
-			    * srv_n_file_io_threads,
-			    srv_n_file_io_threads,
-			    SRV_MAX_N_PENDING_SYNC_IOS);
+ 		/* More than 4 threads are now supported. */
+		n_threads = os_aio_init(8 * SRV_N_PENDING_IOS_PER_THREAD,
+                                        srv_n_read_io_threads,
+                                        srv_n_write_io_threads,
+                                        SRV_MAX_N_PENDING_SYNC_IOS);
 	} else {
-		os_aio_init(SRV_N_PENDING_IOS_PER_THREAD
-			    * srv_n_file_io_threads,
-			    srv_n_file_io_threads,
-			    SRV_MAX_N_PENDING_SYNC_IOS);
+                /* Might need more slots here. Alas, I don't do windows. */
+                n_threads = os_aio_init(SRV_N_PENDING_IOS_PER_THREAD,
+                                        srv_n_read_io_threads,
+                                        srv_n_write_io_threads,
+                                        SRV_MAX_N_PENDING_SYNC_IOS);
+	}
+
+	if (n_threads > SRV_MAX_N_IO_THREADS) {
+		fprintf(stderr,
+			"InnoDB: requested too many IO threads(%d), max is %d\n",
+			(int)n_threads, SRV_MAX_N_IO_THREADS);	
+		return(DB_ERROR);
 	}
 
 	fil_init(srv_max_n_open_files);
@@ -1296,7 +1325,7 @@ innobase_start_or_create_for_mysql(void)
 
 	/* Create i/o-handler threads: */
 
-	for (i = 0; i < srv_n_file_io_threads; i++) {
+	for (i = 0; i < n_threads; i++) {
 		n[i] = i;
 
 		os_thread_create(io_handler_thread, n + i, thread_ids + i);

=== modified file 'storage/innobase/sync/sync0arr.c'
--- a/storage/innobase/sync/sync0arr.c	2008-06-12 00:08:07 +0000
+++ b/storage/innobase/sync/sync0arr.c	2009-07-02 14:23:36 +0000
@@ -110,6 +110,10 @@ struct sync_array_struct {
 					since creation of the array */
 };
 
+/* Counts the number of times that sync_arr_wake_threads_if_sema_free has
+ * found a thread that can run because it may have missed a wakeup signal. */
+ulint sync_wake_ups = 0;
+
 #ifdef UNIV_SYNC_DEBUG
 /**********************************************************************
 This function is called only in the debug version. Detects a deadlock
@@ -295,28 +299,25 @@ sync_array_validate(
 }
 
 /***********************************************************************
-Puts the cell event in reset state. */
+Returns the event that the thread owning the cell waits for. */
 static
-ib_longlong
-sync_cell_event_reset(
-/*==================*/
-				/* out: value of signal_count
-				at the time of reset. */
-	ulint		type,	/* in: lock type mutex/rw_lock */
-	void*		object) /* in: the rw_lock/mutex object */
+os_event_t
+sync_cell_get_event(
+/*================*/
+	sync_cell_t*   	cell) /* in: non-empty sync array cell */
 {
+	ulint type = cell->request_type;
+
 	if (type == SYNC_MUTEX) {
-		return(os_event_reset(((mutex_t *) object)->event));
-#ifdef __WIN__
+		return(((mutex_t *) cell->wait_object)->event);
 	} else if (type == RW_LOCK_WAIT_EX) {
-		return(os_event_reset(
-		       ((rw_lock_t *) object)->wait_ex_event));
-#endif
-	} else {
-		return(os_event_reset(((rw_lock_t *) object)->event));
+		return(((rw_lock_t *) cell->wait_object)->wait_ex_event);
+	} else { /* RW_LOCK_SHARED and RW_LOCK_EX wait on the same event */
+		return(((rw_lock_t *) cell->wait_object)->event);
 	}
 }
 
+
 /**********************************************************************
 Reserves a wait array cell for waiting for an object.
 The event of the cell is reset to nonsignalled state. */
@@ -332,6 +333,7 @@ sync_array_reserve_cell(
 	ulint*		index)	/* out: index of the reserved cell */
 {
 	sync_cell_t*	cell;
+	os_event_t      event;
 	ulint		i;
 
 	ut_a(object);
@@ -370,8 +372,8 @@ sync_array_reserve_cell(
 			/* Make sure the event is reset and also store
 			the value of signal_count at which the event
 			was reset. */
-			cell->signal_count = sync_cell_event_reset(type,
-								object);
+                        event = sync_cell_get_event(cell);
+			cell->signal_count = os_event_reset(event);
 
 			cell->reservation_time = time(NULL);
 
@@ -411,19 +413,7 @@ sync_array_wait_event(
 	ut_a(!cell->waiting);
 	ut_ad(os_thread_get_curr_id() == cell->thread);
 
-	if (cell->request_type == SYNC_MUTEX) {
-		event = ((mutex_t*) cell->wait_object)->event;
-#ifdef __WIN__
-	/* On windows if the thread about to wait is the one which
-	has set the state of the rw_lock to RW_LOCK_WAIT_EX, then
-	it waits on a special event i.e.: wait_ex_event. */
-	} else if (cell->request_type == RW_LOCK_WAIT_EX) {
-		event = ((rw_lock_t*) cell->wait_object)->wait_ex_event;
-#endif
-	} else {
-		event = ((rw_lock_t*) cell->wait_object)->event;
-	}
-
+	event = sync_cell_get_event(cell);
 		cell->waiting = TRUE;
 
 #ifdef UNIV_SYNC_DEBUG
@@ -462,6 +452,7 @@ sync_array_cell_print(
 	mutex_t*	mutex;
 	rw_lock_t*	rwlock;
 	ulint		type;
+	ulint		writer;
 
 	type = cell->request_type;
 
@@ -491,12 +482,14 @@ sync_array_cell_print(
 			(ulong) mutex->waiters);
 
 	} else if (type == RW_LOCK_EX
-#ifdef __WIN__
 		   || type == RW_LOCK_WAIT_EX
-#endif
 		   || type == RW_LOCK_SHARED) {
 
-		fputs(type == RW_LOCK_EX ? "X-lock on" : "S-lock on", file);
+		switch(type) {
+		case RW_LOCK_EX:      fputs("X-lock on", file);      break;
+		case RW_LOCK_WAIT_EX: fputs("wait-X-lock on", file); break;
+		default:              fputs("S-lock on", file);      break;
+		}
 
 		rwlock = cell->old_wait_rw_lock;
 
@@ -504,22 +497,25 @@ sync_array_cell_print(
 			" RW-latch at %p created in file %s line %lu\n",
 			(void*) rwlock, rwlock->cfile_name,
 			(ulong) rwlock->cline);
-		if (rwlock->writer != RW_LOCK_NOT_LOCKED) {
+		writer = rw_lock_get_writer(rwlock);
+		if (writer != RW_LOCK_NOT_LOCKED) {
 			fprintf(file,
 				"a writer (thread id %lu) has"
 				" reserved it in mode %s",
 				(ulong) os_thread_pf(rwlock->writer_thread),
-				rwlock->writer == RW_LOCK_EX
+				writer == RW_LOCK_EX
 				? " exclusive\n"
 				: " wait exclusive\n");
 		}
 
 		fprintf(file,
-			"number of readers %lu, waiters flag %lu\n"
+			"number of readers %lu, waiters flag %lu, "
+                        "lock_word: %ld\n"
 			"Last time read locked in file %s line %lu\n"
 			"Last time write locked in file %s line %lu\n",
-			(ulong) rwlock->reader_count,
+			(ulong) rw_lock_get_reader_count(rwlock),
 			(ulong) rwlock->waiters,
+			rwlock->lock_word,
 			rwlock->last_s_file_name,
 			(ulong) rwlock->last_s_line,
 			rwlock->last_x_file_name,
@@ -553,7 +549,8 @@ sync_array_find_thread(
 		cell = sync_array_get_nth_cell(arr, i);
 
 		if (cell->wait_object != NULL
-		    && os_thread_eq(cell->thread, thread)) {
+		    && os_thread_eq(cell->thread, thread)
+		    && cell->waiting)) {
 
 			return(cell);	/* Found */
 		}
@@ -778,28 +775,30 @@ sync_arr_cell_can_wake_up(
 			return(TRUE);
 		}
 
-	} else if (cell->request_type == RW_LOCK_EX
-		   || cell->request_type == RW_LOCK_WAIT_EX) {
+	} else if (cell->request_type == RW_LOCK_EX) {
 
 		lock = cell->wait_object;
 
-		if (rw_lock_get_reader_count(lock) == 0
-		    && rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED) {
+                /* X_LOCK_DECR is the unlocked state */
+	    	if (lock->lock_word == X_LOCK_DECR) {
 
 			return(TRUE);
 		}
 
-		if (rw_lock_get_reader_count(lock) == 0
-		    && rw_lock_get_writer(lock) == RW_LOCK_WAIT_EX
-		    && os_thread_eq(lock->writer_thread, cell->thread)) {
+        } else if (cell->request_type == RW_LOCK_WAIT_EX) {
+
+	    	lock = cell->wait_object;
+
+                /* lock_word == 0 means all readers have left */
+	    	if (lock->lock_word == 0) {
 
 			return(TRUE);
 		}
-
 	} else if (cell->request_type == RW_LOCK_SHARED) {
 		lock = cell->wait_object;
 
-		if (rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED) {
+                /* lock_word > 0 means no writer or reserved writer */
+		if (lock->lock_word > 0) {
 
 			return(TRUE);
 		}
@@ -844,11 +843,15 @@ sync_array_object_signalled(
 /*========================*/
 	sync_array_t*	arr)	/* in: wait array */
 {
+#ifdef UNIV_SYNC_ATOMIC
+	(void)os_atomic_increment((volatile lint *)&(arr->sg_count), 1);
+#else
 	sync_array_enter(arr);
 
 	arr->sg_count++;
 
 	sync_array_exit(arr);
+#endif
 }
 
 /**************************************************************************
@@ -868,6 +871,7 @@ sync_arr_wake_threads_if_sema_free(void)
 	sync_cell_t*	cell;
 	ulint		count;
 	ulint		i;
+	os_event_t      event;
 
 	sync_array_enter(arr);
 
@@ -877,36 +881,25 @@ sync_arr_wake_threads_if_sema_free(void)
 	while (count < arr->n_reserved) {
 
 		cell = sync_array_get_nth_cell(arr, i);
+		i++;
 
-		if (cell->wait_object != NULL) {
-
+		if (cell->wait_object == NULL) {
+			continue;
+		}
 			count++;
 
-			if (sync_arr_cell_can_wake_up(cell)) {
-
-				if (cell->request_type == SYNC_MUTEX) {
-					mutex_t*	mutex;
+		if (!cell->waiting) {
+			continue;
+		}
 
-					mutex = cell->wait_object;
-					os_event_set(mutex->event);
-#ifdef __WIN__
-				} else if (cell->request_type
-					   == RW_LOCK_WAIT_EX) {
-					rw_lock_t*	lock;
+			if (sync_arr_cell_can_wake_up(cell)) {
 
-					lock = cell->wait_object;
-					os_event_set(lock->wait_ex_event);
-#endif
-				} else {
-					rw_lock_t*	lock;
+			event = sync_cell_get_event(cell);
 
-					lock = cell->wait_object;
-					os_event_set(lock->event);
-				}
-			}
+			os_event_set(event);
+			sync_wake_ups++;
 		}
 
-		i++;
 	}
 
 	sync_array_exit(arr);
@@ -1026,4 +1019,3 @@ sync_array_print_info(
 
 	sync_array_exit(arr);
 }
-

=== modified file 'storage/innobase/sync/sync0rw.c'
--- a/storage/innobase/sync/sync0rw.c	2008-06-12 00:08:07 +0000
+++ b/storage/innobase/sync/sync0rw.c	2009-07-02 14:23:36 +0000
@@ -15,35 +15,110 @@ Created 9/11/1995 Heikki Tuuri
 #include "mem0mem.h"
 #include "srv0srv.h"
 
-/* number of system calls made during shared latching */
-ulint	rw_s_system_call_count	= 0;
+/*
+	IMPLEMENTATION OF THE RW_LOCK
+	=============================
+The status of a rw_lock is held in lock_word. The initial value of lock_word is
+X_LOCK_DECR. lock_word is decremented by 1 for each s-lock and by X_LOCK_DECR
+for each x-lock. This describes the lock state for each value of lock_word:
+ 
+lock_word == X_LOCK_DECR:      Unlocked.
+0 < lock_word < X_LOCK_DECR:   Read locked, no waiting writers.
+ 			       (X_LOCK_DECR - lock_word) is the
+ 			       number of readers that hold the lock.
+lock_word == 0:		       Write locked
+-X_LOCK_DECR < lock_word < 0:  Read locked, with a waiting writer.
+ 			       (-lock_word) is the number of readers
+ 			       that hold the lock.
+lock_word <= -X_LOCK_DECR:     Recursively write locked. lock_word has been
+ 			       decremented by X_LOCK_DECR once for each lock,
+ 			       so the number of locks is:
+ 			       ((-lock_word) / X_LOCK_DECR) + 1
+When lock_word <= -X_LOCK_DECR, we also know that lock_word % X_LOCK_DECR == 0:
+other values of lock_word are invalid.
+ 
+The lock_word is always read and updated atomically and consistently, so that
+it always represents the state of the lock, and the state of the lock changes
+with a single atomic operation. This lock_word holds all of the information
+that a thread needs in order to determine if it is eligible to gain the lock
+or if it must spin or sleep. The one exception to this is that writer_thread
+must be verified before recursive write locks: to solve this scenario, we make
+writer_thread readable by all threads, but only writeable by the x-lock holder.
+
+The other members of the lock obey the following rules to remain consistent:
+ 
+pass:		This is only set to 1 to prevent recursive x-locks. It must
+ 		be set as specified by x_lock caller after the lock_word
+ 		indicates that the thread holds the lock, but before that
+ 		thread resumes execution. It must also be set to 1 during the
+ 		final x_unlock, but before the lock_word status is updated.
+ 		When an x_lock or move_ownership call wishes to change
+ 		pass, it must first update the writer_thread appropriately.
+writer_thread:	Must be set to the writers thread_id after the lock_word
+ 		indicates that the thread holds the lock, but before that
+ 		thread resumes execution. writer_thread may be invalid and
+                should not be read when pass == 1. A thread trying to become
+                writer never reads its own stale writer_thread, since it sets
+                pass during its previous unlock call.
+waiters:	May be set to 1 anytime, but to avoid unnecessary wake-up
+ 		signals, it should only be set to 1 when there are threads
+ 		waiting on event. Must be 1 when a writer starts waiting to
+ 		ensure the current x-locking thread sends a wake-up signal
+ 		during unlock. May only be reset to 0 immediately before a
+ 		a wake-up signal is sent to event.
+event:		Threads wait on event for read or writer lock when another
+ 		thread has an x-lock or an x-lock reservation (wait_ex). A
+ 		thread may only	wait on event after performing the following
+ 		actions in order:
+ 		   (1) Record the counter value of event (with os_event_reset).
+ 		   (2) Set waiters to 1.
+ 		   (3) Verify lock_word <= 0.
+ 		(1) must come before (2) to ensure signal is not missed.
+ 		(2) must come before (3) to ensure a signal is sent.
+ 		These restrictions force the above ordering.
+ 		Immediately before sending the wake-up signal, we should:
+ 		   (1) Verify lock_word == X_LOCK_DECR (unlocked)
+ 		   (2) Reset waiters to 0.
+wait_ex_event:	A thread may only wait on the wait_ex_event after it has
+ 		performed the following actions in order:
+ 		   (1) Decrement lock_word by X_LOCK_DECR.
+ 		   (2) Record counter value of wait_ex_event (os_event_reset,
+                        called from sync_array_reserve_cell).
+ 		   (3) Verify that lock_word < 0.
+ 		(1) must come first to ensures no other threads become reader
+                 or next writer, and notifies unlocker that signal must be sent.
+                 (2) must come before (3) to ensure the signal is not missed.
+ 		These restrictions force the above ordering.
+ 		Immediately before sending the wake-up signal, we should:
+		   Verify lock_word == 0 (waiting thread holds x_lock)
+*/
+
 
 /* number of spin waits on rw-latches,
 resulted during shared (read) locks */
-ulint	rw_s_spin_wait_count	= 0;
+ib_longlong	rw_s_spin_wait_count	= 0;
+ib_longlong	rw_s_spin_round_count	= 0;
 
 /* number of OS waits on rw-latches,
 resulted during shared (read) locks */
-ulint	rw_s_os_wait_count	= 0;
+ib_longlong	rw_s_os_wait_count	= 0;
 
 /* number of unlocks (that unlock shared locks),
 set only when UNIV_SYNC_PERF_STAT is defined */
-ulint	rw_s_exit_count		= 0;
-
-/* number of system calls made during exclusive latching */
-ulint	rw_x_system_call_count	= 0;
+ib_longlong	rw_s_exit_count		= 0;
 
 /* number of spin waits on rw-latches,
 resulted during exclusive (write) locks */
-ulint	rw_x_spin_wait_count	= 0;
+ib_longlong	rw_x_spin_wait_count	= 0;
+ib_longlong	rw_x_spin_round_count	= 0;
 
 /* number of OS waits on rw-latches,
 resulted during exclusive (write) locks */
-ulint	rw_x_os_wait_count	= 0;
+ib_longlong	rw_x_os_wait_count	= 0;
 
 /* number of unlocks (that unlock exclusive locks),
 set only when UNIV_SYNC_PERF_STAT is defined */
-ulint	rw_x_exit_count		= 0;
+ib_longlong	rw_x_exit_count		= 0;
 
 /* The global list of rw-locks */
 rw_lock_list_t	rw_lock_list;
@@ -119,6 +194,7 @@ rw_lock_create_func(
 	/* If this is the very first time a synchronization object is
 	created, then the following call initializes the sync system. */
 
+#ifndef UNIV_SYNC_ATOMIC
 	mutex_create(rw_lock_get_mutex(lock), SYNC_NO_ORDER_CHECK);
 
 	lock->mutex.cfile_name = cfile_name;
@@ -129,12 +205,12 @@ rw_lock_create_func(
 	lock->mutex.mutex_type = 1;
 #endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
 
-	rw_lock_set_waiters(lock, 0);
-	rw_lock_set_writer(lock, RW_LOCK_NOT_LOCKED);
-	lock->writer_count = 0;
-	rw_lock_set_reader_count(lock, 0);
+#endif /* UNIV_SYNC_ATOMIC */
 
-	lock->writer_is_wait_ex = FALSE;
+	lock->lock_word = X_LOCK_DECR;
+	lock->waiters = 0;
+ 	lock->pass = 1;
+ 	/* We do not have to initialize writer_thread until pass == 0 */
 
 #ifdef UNIV_SYNC_DEBUG
 	UT_LIST_INIT(lock->debug_list);
@@ -147,15 +223,13 @@ rw_lock_create_func(
 	lock->cfile_name = cfile_name;
 	lock->cline = (unsigned int) cline;
 
+	lock->count_os_wait = 0;
 	lock->last_s_file_name = "not yet reserved";
 	lock->last_x_file_name = "not yet reserved";
 	lock->last_s_line = 0;
 	lock->last_x_line = 0;
 	lock->event = os_event_create(NULL);
-
-#ifdef __WIN__
 	lock->wait_ex_event = os_event_create(NULL);
-#endif
 
 	mutex_enter(&rw_lock_list_mutex);
 
@@ -180,20 +254,18 @@ rw_lock_free(
 	rw_lock_t*	lock)	/* in: rw-lock */
 {
 	ut_ad(rw_lock_validate(lock));
-	ut_a(rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED);
-	ut_a(rw_lock_get_waiters(lock) == 0);
-	ut_a(rw_lock_get_reader_count(lock) == 0);
+	ut_a(lock->lock_word == X_LOCK_DECR);
 
 	lock->magic_n = 0;
 
+#ifndef UNIV_SYNC_ATOMIC
 	mutex_free(rw_lock_get_mutex(lock));
+#endif /* UNIV_SYNC_ATOMIC */
 
 	mutex_enter(&rw_lock_list_mutex);
 	os_event_free(lock->event);
 
-#ifdef __WIN__
 	os_event_free(lock->wait_ex_event);
-#endif
 
 	if (UT_LIST_GET_PREV(list, lock)) {
 		ut_a(UT_LIST_GET_PREV(list, lock)->magic_n == RW_LOCK_MAGIC_N);
@@ -219,19 +291,12 @@ rw_lock_validate(
 {
 	ut_a(lock);
 
-	mutex_enter(rw_lock_get_mutex(lock));
+	ulint waiters = rw_lock_get_waiters(lock);
+	lint lock_word = lock->lock_word;
 
 	ut_a(lock->magic_n == RW_LOCK_MAGIC_N);
-	ut_a((rw_lock_get_reader_count(lock) == 0)
-	     || (rw_lock_get_writer(lock) != RW_LOCK_EX));
-	ut_a((rw_lock_get_writer(lock) == RW_LOCK_EX)
-	     || (rw_lock_get_writer(lock) == RW_LOCK_WAIT_EX)
-	     || (rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED));
-	ut_a((rw_lock_get_waiters(lock) == 0)
-	     || (rw_lock_get_waiters(lock) == 1));
-	ut_a((lock->writer != RW_LOCK_EX) || (lock->writer_count > 0));
-
-	mutex_exit(rw_lock_get_mutex(lock));
+	ut_a(waiters == 0 || waiters == 1);
+	ut_a(lock_word > -X_LOCK_DECR ||(-lock_word) % X_LOCK_DECR == 0);
 
 	return(TRUE);
 }
@@ -253,18 +318,15 @@ rw_lock_s_lock_spin(
 	ulint		line)	/* in: line where requested */
 {
 	ulint	 index;	/* index of the reserved wait cell */
-	ulint	 i;	/* spin round count */
+	ulint	 i = 0;	/* spin round count */
 
 	ut_ad(rw_lock_validate(lock));
 
+	rw_s_spin_wait_count++;	/* Count calls to this function */
 lock_loop:
-	rw_s_spin_wait_count++;
 
 	/* Spin waiting for the writer field to become free */
-	i = 0;
-
-	while (rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED
-	       && i < SYNC_SPIN_ROUNDS) {
+	while (i < SYNC_SPIN_ROUNDS && lock->lock_word <= 0) {
 		if (srv_spin_wait_delay) {
 			ut_delay(ut_rnd_interval(0, srv_spin_wait_delay));
 		}
@@ -285,28 +347,32 @@ lock_loop:
 			lock->cfile_name, (ulong) lock->cline, (ulong) i);
 	}
 
-	mutex_enter(rw_lock_get_mutex(lock));
-
 	/* We try once again to obtain the lock */
-
 	if (TRUE == rw_lock_s_lock_low(lock, pass, file_name, line)) {
-		mutex_exit(rw_lock_get_mutex(lock));
+		rw_s_spin_round_count += i;
 
 		return; /* Success */
 	} else {
-		/* If we get here, locking did not succeed, we may
-		suspend the thread to wait in the wait array */
 
-		rw_s_system_call_count++;
+		if (i < SYNC_SPIN_ROUNDS) {
+			goto lock_loop;
+		}
+
+		rw_s_spin_round_count += i;
 
 		sync_array_reserve_cell(sync_primary_wait_array,
 					lock, RW_LOCK_SHARED,
 					file_name, line,
 					&index);
 
-		rw_lock_set_waiters(lock, 1);
-
-		mutex_exit(rw_lock_get_mutex(lock));
+		/* Set waiters before checking lock_word to ensure wake-up
+                signal is sent. This may lead to some unnecessary signals. */
+		rw_lock_set_waiters(lock);
+
+		if (TRUE == rw_lock_s_lock_low(lock, pass, file_name, line)) {
+			sync_array_free_cell(sync_primary_wait_array, index);
+			return; /* Success */
+		}
 
 		if (srv_print_latch_waits) {
 			fprintf(stderr,
@@ -317,11 +383,13 @@ lock_loop:
 				(ulong) lock->cline);
 		}
 
-		rw_s_system_call_count++;
+		/* these stats may not be accurate */
+		lock->count_os_wait++;
 		rw_s_os_wait_count++;
 
 		sync_array_wait_event(sync_primary_wait_array, index);
 
+		i = 0;
 		goto lock_loop;
 	}
 }
@@ -343,113 +411,137 @@ rw_lock_x_lock_move_ownership(
 {
 	ut_ad(rw_lock_is_locked(lock, RW_LOCK_EX));
 
+#ifdef UNIV_SYNC_ATOMIC
+        lock->writer_thread = os_thread_get_curr_id();
+        os_memory_barrier_store();
+	lock->pass = 0;
+#else /* UNIV_SYNC_ATOMIC */
 	mutex_enter(&(lock->mutex));
-
 	lock->writer_thread = os_thread_get_curr_id();
-
 	lock->pass = 0;
-
 	mutex_exit(&(lock->mutex));
+#endif /* UNIV_SYNC_ATOMIC */
 }
 
 /**********************************************************************
-Low-level function for acquiring an exclusive lock. */
+Function for the next writer to call. Waits for readers to exit.
+The caller must have already decremented lock_word by X_LOCK_DECR.*/
 UNIV_INLINE
-ulint
-rw_lock_x_lock_low(
-/*===============*/
-				/* out: RW_LOCK_NOT_LOCKED if did
-				not succeed, RW_LOCK_EX if success,
-				RW_LOCK_WAIT_EX, if got wait reservation */
+void
+rw_lock_x_lock_wait(
+/*================*/
 	rw_lock_t*	lock,	/* in: pointer to rw-lock */
+#ifdef UNIV_SYNC_DEBUG
 	ulint		pass,	/* in: pass value; != 0, if the lock will
 				be passed to another thread to unlock */
+#endif
 	const char*	file_name,/* in: file name where lock requested */
 	ulint		line)	/* in: line where requested */
 {
-	ut_ad(mutex_own(rw_lock_get_mutex(lock)));
+	ulint index;
+	ulint i = 0;
 
-	if (rw_lock_get_writer(lock) == RW_LOCK_NOT_LOCKED) {
+	ut_ad(lock->lock_word <= 0);
 
-		if (rw_lock_get_reader_count(lock) == 0) {
+	while (lock->lock_word < 0) {
+		if (srv_spin_wait_delay) {
+			ut_delay(ut_rnd_interval(0, srv_spin_wait_delay));
+		}
+		if(i < SYNC_SPIN_ROUNDS) {
+			i++;
+			continue;
+		}
 
-			rw_lock_set_writer(lock, RW_LOCK_EX);
-			lock->writer_thread = os_thread_get_curr_id();
-			lock->writer_count++;
-			lock->pass = pass;
+		/* If there is still a reader, then go to sleep.*/
+		rw_x_spin_round_count += i;
+		i = 0;
+		sync_array_reserve_cell(sync_primary_wait_array,
+					lock,
+					RW_LOCK_WAIT_EX,
+					file_name, line,
+					&index);
+		/* Check lock_word to ensure wake-up isn't missed.*/
+		if(lock->lock_word < 0) {
 
+			/* these stats may not be accurate */
+			lock->count_os_wait++;
+			rw_x_os_wait_count++;
+
+                        /* Add debug info as it is needed to detect possible
+                        deadlock. We must add info for WAIT_EX thread for
+                        deadlock detection to work properly. */
 #ifdef UNIV_SYNC_DEBUG
-			rw_lock_add_debug_info(lock, pass, RW_LOCK_EX,
+			rw_lock_add_debug_info(lock, pass, RW_LOCK_WAIT_EX,
 					       file_name, line);
 #endif
-			lock->last_x_file_name = file_name;
-			lock->last_x_line = (unsigned int) line;
-
-			/* Locking succeeded, we may return */
-			return(RW_LOCK_EX);
-		} else {
-			/* There are readers, we have to wait */
-			rw_lock_set_writer(lock, RW_LOCK_WAIT_EX);
-			lock->writer_thread = os_thread_get_curr_id();
-			lock->pass = pass;
-			lock->writer_is_wait_ex = TRUE;
 
+ 			sync_array_wait_event(sync_primary_wait_array,
+ 					      index);
 #ifdef UNIV_SYNC_DEBUG
-			rw_lock_add_debug_info(lock, pass, RW_LOCK_WAIT_EX,
-					       file_name, line);
+			rw_lock_remove_debug_info(lock, pass,
+					       RW_LOCK_WAIT_EX);
 #endif
-
-			return(RW_LOCK_WAIT_EX);
+                        /* It is possible to wake when lock_word < 0.
+                        We must pass the while-loop check to proceed.*/
+		} else {
+			sync_array_free_cell(sync_primary_wait_array,
+					     index);
 		}
+	}
+	rw_x_spin_round_count += i;
+}
 
-	} else if ((rw_lock_get_writer(lock) == RW_LOCK_WAIT_EX)
-		   && os_thread_eq(lock->writer_thread,
-				   os_thread_get_curr_id())) {
-
-		if (rw_lock_get_reader_count(lock) == 0) {
+/**********************************************************************
+Low-level function for acquiring an exclusive lock. */
+UNIV_INLINE
+ibool
+rw_lock_x_lock_low(
+/*===============*/
+				/* out: RW_LOCK_NOT_LOCKED if did
+				not succeed, RW_LOCK_EX if success. */
+	rw_lock_t*	lock,	/* in: pointer to rw-lock */
+	ulint		pass,	/* in: pass value; != 0, if the lock will
+				be passed to another thread to unlock */
+	const char*	file_name,/* in: file name where lock requested */
+	ulint		line)	/* in: line where requested */
+{
+	os_thread_id_t	curr_thread	= os_thread_get_curr_id();
 
-			rw_lock_set_writer(lock, RW_LOCK_EX);
-			lock->writer_count++;
-			lock->pass = pass;
-			lock->writer_is_wait_ex = FALSE;
+	if(rw_lock_lock_word_decr(lock, X_LOCK_DECR)) {
+		ut_ad(lock->pass);
 
+		/* Decrement occurred: we are writer or next-writer. */
+		lock->writer_thread = curr_thread;
+		lock->pass = pass;
+		rw_lock_x_lock_wait(lock,
 #ifdef UNIV_SYNC_DEBUG
-			rw_lock_remove_debug_info(lock, pass, RW_LOCK_WAIT_EX);
-			rw_lock_add_debug_info(lock, pass, RW_LOCK_EX,
-					       file_name, line);
+				    pass,
 #endif
+                                    file_name, line);
 
-			lock->last_x_file_name = file_name;
-			lock->last_x_line = (unsigned int) line;
-
-			/* Locking succeeded, we may return */
-			return(RW_LOCK_EX);
+	} else {
+		/* Decrement failed: relock or failed lock */
+		/* Must verify pass first: otherwise another thread can
+		call move_ownership suddenly allowing recursive locks.
+		and after we have verified our thread_id matches
+		(though move_ownership has since changed it).*/
+		if(!pass && !(lock->pass) &&
+                   os_thread_eq(lock->writer_thread, curr_thread)) {
+			/* Relock */
+                        lock->lock_word -= X_LOCK_DECR;
+		} else {
+			/* Another thread locked before us */
+			return(FALSE);
 		}
-
-		return(RW_LOCK_WAIT_EX);
-
-	} else if ((rw_lock_get_writer(lock) == RW_LOCK_EX)
-		   && os_thread_eq(lock->writer_thread,
-				   os_thread_get_curr_id())
-		   && (lock->pass == 0)
-		   && (pass == 0)) {
-
-		lock->writer_count++;
-
+	}
 #ifdef UNIV_SYNC_DEBUG
-		rw_lock_add_debug_info(lock, pass, RW_LOCK_EX, file_name,
-				       line);
+	rw_lock_add_debug_info(lock, pass, RW_LOCK_EX,
+			       file_name, line);
 #endif
+	lock->last_x_file_name = file_name;
+	lock->last_x_line = (unsigned int) line;
 
-		lock->last_x_file_name = file_name;
-		lock->last_x_line = (unsigned int) line;
-
-		/* Locking succeeded, we may return */
-		return(RW_LOCK_EX);
-	}
-
-	/* Locking did not succeed */
-	return(RW_LOCK_NOT_LOCKED);
+	return(TRUE);
 }
 
 /**********************************************************************
@@ -472,47 +564,30 @@ rw_lock_x_lock_func(
 	ulint		line)	/* in: line where requested */
 {
 	ulint	index;	/* index of the reserved wait cell */
-	ulint	state;	/* lock state acquired */
 	ulint	i;	/* spin round count */
+	ibool   spinning = FALSE;
 
 	ut_ad(rw_lock_validate(lock));
 
-lock_loop:
-	/* Acquire the mutex protecting the rw-lock fields */
-	mutex_enter_fast(&(lock->mutex));
-
-	state = rw_lock_x_lock_low(lock, pass, file_name, line);
+	i = 0;
 
-	mutex_exit(&(lock->mutex));
+lock_loop:
 
-	if (state == RW_LOCK_EX) {
+	if (rw_lock_x_lock_low(lock, pass, file_name, line)) {
+		rw_x_spin_round_count += i;
 
 		return;	/* Locking succeeded */
 
-	} else if (state == RW_LOCK_NOT_LOCKED) {
-
-		/* Spin waiting for the writer field to become free */
-		i = 0;
-
-		while (rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED
-		       && i < SYNC_SPIN_ROUNDS) {
-			if (srv_spin_wait_delay) {
-				ut_delay(ut_rnd_interval(0,
-							 srv_spin_wait_delay));
-			}
+	} else {
 
-			i++;
-		}
-		if (i == SYNC_SPIN_ROUNDS) {
-			os_thread_yield();
+                if (!spinning) {
+                        spinning = TRUE;
+                        rw_x_spin_wait_count++;
 		}
-	} else if (state == RW_LOCK_WAIT_EX) {
-
-		/* Spin waiting for the reader count field to become zero */
-		i = 0;
 
-		while (rw_lock_get_reader_count(lock) != 0
-		       && i < SYNC_SPIN_ROUNDS) {
+		/* Spin waiting for the lock_word to become free */
+		while (i < SYNC_SPIN_ROUNDS
+		       && lock->lock_word <= 0) {
 			if (srv_spin_wait_delay) {
 				ut_delay(ut_rnd_interval(0,
 							 srv_spin_wait_delay));
@@ -522,12 +597,13 @@ lock_loop:
 		}
 		if (i == SYNC_SPIN_ROUNDS) {
 			os_thread_yield();
+		} else {
+			goto lock_loop;
 		}
-	} else {
-		i = 0; /* Eliminate a compiler warning */
-		ut_error;
 	}
 
+	rw_x_spin_round_count += i;
+
 	if (srv_print_latch_waits) {
 		fprintf(stderr,
 			"Thread %lu spin wait rw-x-lock at %p"
@@ -536,39 +612,20 @@ lock_loop:
 			lock->cfile_name, (ulong) lock->cline, (ulong) i);
 	}
 
-	rw_x_spin_wait_count++;
-
-	/* We try once again to obtain the lock. Acquire the mutex protecting
-	the rw-lock fields */
-
-	mutex_enter(rw_lock_get_mutex(lock));
-
-	state = rw_lock_x_lock_low(lock, pass, file_name, line);
-
-	if (state == RW_LOCK_EX) {
-		mutex_exit(rw_lock_get_mutex(lock));
-
-		return;	/* Locking succeeded */
-	}
-
-	rw_x_system_call_count++;
-
 	sync_array_reserve_cell(sync_primary_wait_array,
 				lock,
-#ifdef __WIN__
-				/* On windows RW_LOCK_WAIT_EX signifies
-				that this thread should wait on the
-				special wait_ex_event. */
-				(state == RW_LOCK_WAIT_EX)
-				 ? RW_LOCK_WAIT_EX :
-#endif
 				RW_LOCK_EX,
 				file_name, line,
 				&index);
 
-	rw_lock_set_waiters(lock, 1);
-
-	mutex_exit(rw_lock_get_mutex(lock));
+	/* Waiters must be set before checking lock_word, to ensure signal
+	is sent. This could lead to a few unnecessary wake-up signals. */
+	rw_lock_set_waiters(lock);
+
+	if (rw_lock_x_lock_low(lock, pass, file_name, line)) {
+		sync_array_free_cell(sync_primary_wait_array, index);
+		return; /* Locking succeeded */
+	}
 
 	if (srv_print_latch_waits) {
 		fprintf(stderr,
@@ -578,11 +635,13 @@ lock_loop:
 			lock->cfile_name, (ulong) lock->cline);
 	}
 
-	rw_x_system_call_count++;
+	/* these stats may not be accurate */
+	lock->count_os_wait++;
 	rw_x_os_wait_count++;
 
 	sync_array_wait_event(sync_primary_wait_array, index);
 
+	i = 0;
 	goto lock_loop;
 }
 
@@ -730,7 +789,7 @@ rw_lock_own(
 	ut_ad(lock);
 	ut_ad(rw_lock_validate(lock));
 
-	mutex_enter(&(lock->mutex));
+	rw_lock_debug_mutex_enter();
 
 	info = UT_LIST_GET_FIRST(lock->debug_list);
 
@@ -740,7 +799,7 @@ rw_lock_own(
 		    && (info->pass == 0)
 		    && (info->lock_type == lock_type)) {
 
-			mutex_exit(&(lock->mutex));
+			rw_lock_debug_mutex_exit();
 			/* Found! */
 
 			return(TRUE);
@@ -748,7 +807,7 @@ rw_lock_own(
 
 		info = UT_LIST_GET_NEXT(list, info);
 	}
-	mutex_exit(&(lock->mutex));
+	rw_lock_debug_mutex_exit();
 
 	return(FALSE);
 }
@@ -770,22 +829,18 @@ rw_lock_is_locked(
 	ut_ad(lock);
 	ut_ad(rw_lock_validate(lock));
 
-	mutex_enter(&(lock->mutex));
-
 	if (lock_type == RW_LOCK_SHARED) {
-		if (lock->reader_count > 0) {
+		if (rw_lock_get_reader_count(lock) > 0) {
 			ret = TRUE;
 		}
 	} else if (lock_type == RW_LOCK_EX) {
-		if (lock->writer == RW_LOCK_EX) {
+		if (rw_lock_get_writer(lock) == RW_LOCK_EX) {
 			ret = TRUE;
 		}
 	} else {
 		ut_error;
 	}
 
-	mutex_exit(&(lock->mutex));
-
 	return(ret);
 }
 
@@ -814,11 +869,10 @@ rw_lock_list_print_info(
 
 		count++;
 
+#ifndef UNIV_SYNC_ATOMIC
 		mutex_enter(&(lock->mutex));
-
-		if ((rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED)
-		    || (rw_lock_get_reader_count(lock) != 0)
-		    || (rw_lock_get_waiters(lock) != 0)) {
+#endif
+		if (lock->lock_word != X_LOCK_DECR) {
 
 			fprintf(file, "RW-LOCK: %p ", (void*) lock);
 
@@ -834,8 +888,10 @@ rw_lock_list_print_info(
 				info = UT_LIST_GET_NEXT(list, info);
 			}
 		}
-
+#ifndef UNIV_SYNC_ATOMIC
 		mutex_exit(&(lock->mutex));
+#endif
+
 		lock = UT_LIST_GET_NEXT(list, lock);
 	}
 
@@ -858,9 +914,10 @@ rw_lock_print(
 		"RW-LATCH INFO\n"
 		"RW-LATCH: %p ", (void*) lock);
 
-	if ((rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED)
-	    || (rw_lock_get_reader_count(lock) != 0)
-	    || (rw_lock_get_waiters(lock) != 0)) {
+#ifndef UNIV_SYNC_ATOMIC
+	mutex_enter(&(lock->mutex));
+#endif
+	if (lock->lock_word != X_LOCK_DECR) {
 
 		if (rw_lock_get_waiters(lock)) {
 			fputs(" Waiters for the lock exist\n", stderr);
@@ -874,6 +931,9 @@ rw_lock_print(
 			info = UT_LIST_GET_NEXT(list, info);
 		}
 	}
+#ifndef UNIV_SYNC_ATOMIC
+	mutex_exit(&(lock->mutex));
+#endif
 }
 
 /*************************************************************************
@@ -922,14 +982,11 @@ rw_lock_n_locked(void)
 	lock = UT_LIST_GET_FIRST(rw_lock_list);
 
 	while (lock != NULL) {
-		mutex_enter(rw_lock_get_mutex(lock));
 
-		if ((rw_lock_get_writer(lock) != RW_LOCK_NOT_LOCKED)
-		    || (rw_lock_get_reader_count(lock) != 0)) {
+		if (lock->lock_word != X_LOCK_DECR) {
 			count++;
 		}
 
-		mutex_exit(rw_lock_get_mutex(lock));
 		lock = UT_LIST_GET_NEXT(list, lock);
 	}
 

=== modified file 'storage/innobase/sync/sync0sync.c'
--- a/storage/innobase/sync/sync0sync.c	2008-06-12 00:08:07 +0000
+++ b/storage/innobase/sync/sync0sync.c	2009-07-02 14:23:36 +0000
@@ -138,18 +138,13 @@ Therefore, this thread is guaranteed to 
 signalled unconditionally at the release of the lock.
 Q.E.D. */
 
-/* The number of system calls made in this module. Intended for performance
-monitoring. */
-
-ulint	mutex_system_call_count		= 0;
-
 /* Number of spin waits on mutexes: for performance monitoring */
 
 /* round=one iteration of a spin loop */
-ulint	mutex_spin_round_count		= 0;
-ulint	mutex_spin_wait_count		= 0;
-ulint	mutex_os_wait_count		= 0;
-ulint	mutex_exit_count		= 0;
+ib_longlong	mutex_spin_round_count		= 0;
+ib_longlong	mutex_spin_wait_count		= 0;
+ib_longlong	mutex_os_wait_count		= 0;
+ib_longlong	mutex_exit_count		= 0;
 
 /* The global array of wait cells for implementation of the database's own
 mutexes and read-write locks */
@@ -243,6 +238,8 @@ mutex_create_func(
 {
 #if defined(_WIN32) && defined(UNIV_CAN_USE_X86_ASSEMBLER)
 	mutex_reset_lock_word(mutex);
+#elif defined(MY_ATOMIC_NOLOCK)
+	mutex_reset_lock_word(mutex);
 #else
 	os_fast_mutex_init(&(mutex->os_fast_mutex));
 	mutex->lock_word = 0;
@@ -333,7 +330,9 @@ mutex_free(
 
 	os_event_free(mutex->event);
 
-#if !defined(_WIN32) || !defined(UNIV_CAN_USE_X86_ASSEMBLER)
+#if defined(_WIN32) && defined(UNIV_CAN_USE_X86_ASSEMBLER)
+#elif defined(MY_ATOMIC_NOLOCK)
+#else
 	os_fast_mutex_free(&(mutex->os_fast_mutex));
 #endif
 	/* If we free the mutex protecting the mutex list (freeing is
@@ -450,6 +449,12 @@ mutex_spin_wait(
 #endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
 	ut_ad(mutex);
 
+	/* This update is not thread safe, but we don't mind if the count
+	isn't exact. Moved out of ifdef that follows because we are willing
+	to sacrifice the cost of counting this as the data is valuable.
+	Count the number of calls to mutex_spin_wait. */
+	mutex_spin_wait_count++;
+
 mutex_loop:
 
 	i = 0;
@@ -462,7 +467,6 @@ mutex_loop:
 
 spin_loop:
 #if defined UNIV_DEBUG && !defined UNIV_HOTBACKUP
-	mutex_spin_wait_count++;
 	mutex->count_spin_loop++;
 #endif /* UNIV_DEBUG && !UNIV_HOTBACKUP */
 
@@ -527,8 +531,6 @@ spin_loop:
 	sync_array_reserve_cell(sync_primary_wait_array, mutex,
 				SYNC_MUTEX, file_name, line, &index);
 
-	mutex_system_call_count++;
-
 	/* The memory order of the array reservation and the change in the
 	waiters field is important: when we suspend a thread, we first
 	reserve the cell and then set waiters field to 1. When threads are
@@ -575,7 +577,6 @@ spin_loop:
 		mutex->cfile_name, (ulong) mutex->cline, (ulong) i);
 #endif
 
-	mutex_system_call_count++;
 	mutex_os_wait_count++;
 
 #ifndef UNIV_HOTBACKUP
@@ -1377,21 +1378,31 @@ sync_print_wait_info(
 	FILE*	file)		/* in: file where to print */
 {
 #ifdef UNIV_SYNC_DEBUG
-	fprintf(file, "Mutex exits %lu, rws exits %lu, rwx exits %lu\n",
+	fprintf(file, "Mutex exits %llu, rws exits %llu, rwx exits %llu\n",
 		mutex_exit_count, rw_s_exit_count, rw_x_exit_count);
 #endif
 
 	fprintf(file,
-		"Mutex spin waits %lu, rounds %lu, OS waits %lu\n"
-		"RW-shared spins %lu, OS waits %lu;"
-		" RW-excl spins %lu, OS waits %lu\n",
-		(ulong) mutex_spin_wait_count,
-		(ulong) mutex_spin_round_count,
-		(ulong) mutex_os_wait_count,
-		(ulong) rw_s_spin_wait_count,
-		(ulong) rw_s_os_wait_count,
-		(ulong) rw_x_spin_wait_count,
-		(ulong) rw_x_os_wait_count);
+		"Mutex spin waits %llu, rounds %llu, OS waits %llu\n"
+		"RW-shared spins %llu, OS waits %llu;"
+		" RW-excl spins %llu, OS waits %llu\n",
+		mutex_spin_wait_count,
+		mutex_spin_round_count,
+		mutex_os_wait_count,
+		rw_s_spin_wait_count,
+		rw_s_os_wait_count,
+		rw_x_spin_wait_count,
+		rw_x_os_wait_count);
+
+	fprintf(file,
+		"Spin rounds per wait: %.2f mutex, %.2f RW-shared, "
+		"%.2f RW-excl\n",
+		(double) mutex_spin_round_count /
+		(mutex_spin_wait_count ? mutex_spin_wait_count : 1),
+		(double) rw_s_spin_round_count /
+		(rw_s_spin_wait_count ? rw_s_spin_wait_count : 1),
+		(double) rw_x_spin_round_count /
+		(rw_x_spin_wait_count ? rw_x_spin_wait_count : 1));
 }
 
 /***********************************************************************

=== modified file 'storage/innobase/ut/ut0ut.c'
--- a/storage/innobase/ut/ut0ut.c	2008-12-19 00:34:15 +0000
+++ b/storage/innobase/ut/ut0ut.c	2009-07-02 14:23:36 +0000
@@ -154,6 +154,23 @@ ut_usectime(
 }
 
 /**************************************************************
+Returns diff in microseconds (end_sec,end_ms) - (start_sec,start_ms) */
+
+ib_longlong
+ut_usecdiff(
+/*========*/
+	ulint	end_sec,	/* in: seconds since the Epoch */
+	ulint	end_ms,	/* in: microseconds since the Epoch+*sec1 */
+	ulint	start_sec,	/* in: seconds since the Epoch */
+	ulint	start_ms)	/* in: microseconds since the Epoch+*sec2 */
+{
+  ib_longlong end_mics = end_sec * 1000000LL + end_ms;
+  ib_longlong start_mics = start_sec * 1000000LL + start_ms;
+
+  return end_mics - start_mics;
+}
+
+/**************************************************************
 Returns the difference of two times in seconds. */
 
 double
@@ -348,6 +365,7 @@ ut_delay(
 	j = 0;
 
 	for (i = 0; i < delay * 50; i++) {
+                PAUSE_INSTRUCTION();
 		j += i;
 	}
 

=== added file 'storage/innobase/win_atomics32_test.c'
--- a/storage/innobase/win_atomics32_test.c	1970-01-01 00:00:00 +0000
+++ b/storage/innobase/win_atomics32_test.c	2009-06-16 13:16:15 +0000
@@ -0,0 +1,30 @@
+/* Copyright (C) 2009 Sun Microsystems AB
+ 
+  This program is free software; you can redistribute it and/or modify
+  it under the terms of the GNU General Public License as published by
+  the Free Software Foundation; version 2 of the License.
+ 
+  This program is distributed in the hope that it will be useful,
+  but WITHOUT ANY WARRANTY; without even the implied warranty of
+  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+  GNU General Public License for more details.
+ 
+  You should have received a copy of the GNU General Public License
+  along with this program; if not, write to the Free Software
+  Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA */
+
+#include <windows.h>
+
+int main()
+{
+  volatile long var32 = 0;
+  long add32 = 1;
+  long old32 = 0;
+  long exch32 = 1;
+  long ret_value;
+
+  ret_value = InterlockedExchangeAdd(&var32, add32);
+  ret_value = InterlockedCompareExchange(&var32, exch32, old32);
+  MemoryBarrier();
+  return EXIT_SUCCESS;
+}

=== added file 'storage/innobase/win_atomics64_test.c'
--- a/storage/innobase/win_atomics64_test.c	1970-01-01 00:00:00 +0000