List:Commits« Previous MessageNext Message »
From:Andrei Elkin Date:May 18 2011 3:32pm
Subject:bzr commit into mysql-next-mr-wl5569 branch (andrei.elkin:3487) WL#5569
WL#5754
View as plain text  
#At file:///home/andrei/MySQL/BZR/2a-23May/MERGE/mysql-trunk-wl5569/ based on revid:dmitry.shulga@stripped

 3487 Andrei Elkin	2011-05-18 [merge]
      wl#5569 MTS
      wl#5754 Query event parallel applying
      
      This is the full patch also containing refactoring C-W interfaces to make strict correspondence
      of the execution context to the executing actor should it be Coordinator or Worker.

    added:
      sql/dynamic_ids.cc
      sql/rpl_info_dummy.cc
      sql/rpl_info_dummy.h
      sql/rpl_rli_pdb.cc
      sql/rpl_rli_pdb.h
    renamed:
      sql/server_ids.h => sql/dynamic_ids.h
    modified:
      .bzr-mysql/default.conf
      client/mysqldump.c
      include/my_pthread.h
      scripts/mysql_install_db.pl.in
      scripts/mysql_install_db.sh
      scripts/mysql_system_tables.sql
      sql/CMakeLists.txt
      sql/binlog.cc
      sql/binlog.h
      sql/events.cc
      sql/field.cc
      sql/handler.cc
      sql/lock.cc
      sql/log_event.cc
      sql/log_event.h
      sql/mysqld.cc
      sql/mysqld.h
      sql/rpl_info.cc
      sql/rpl_info.h
      sql/rpl_info_factory.cc
      sql/rpl_info_factory.h
      sql/rpl_info_file.cc
      sql/rpl_info_file.h
      sql/rpl_info_handler.h
      sql/rpl_info_table.cc
      sql/rpl_info_table.h
      sql/rpl_info_table_access.cc
      sql/rpl_info_table_access.h
      sql/rpl_mi.cc
      sql/rpl_mi.h
      sql/rpl_reporting.cc
      sql/rpl_reporting.h
      sql/rpl_rli.cc
      sql/rpl_rli.h
      sql/rpl_slave.cc
      sql/rpl_slave.h
      sql/rpl_utility.h
      sql/share/errmsg-utf8.txt
      sql/sp.cc
      sql/sql_base.cc
      sql/sql_class.cc
      sql/sql_class.h
      sql/sql_db.cc
      sql/sql_parse.cc
      sql/sql_rename.cc
      sql/sql_table.cc
      sql/sql_trigger.cc
      sql/sql_view.cc
      sql/sys_vars.cc
      sql/table.cc
      sql/dynamic_ids.h
=== modified file '.bzr-mysql/default.conf'
--- a/.bzr-mysql/default.conf	2011-01-08 15:07:32 +0000
+++ b/.bzr-mysql/default.conf	2011-01-11 23:01:02 +0000
@@ -1,4 +1,4 @@
 [MYSQL]
 post_commit_to = "commits@stripped"
 post_push_to = "commits@stripped"
-tree_name = "mysql-trunk"
+tree_name = "mysql-next-mr-wl5569"

=== modified file 'client/mysqldump.c'
--- a/client/mysqldump.c	2010-12-07 12:11:26 +0000
+++ b/client/mysqldump.c	2010-12-27 18:54:41 +0000
@@ -914,7 +914,9 @@ static int get_options(int *argc, char *
       my_hash_insert(&ignore_table,
                      (uchar*) my_strdup("mysql.slave_master_info", MYF(MY_WME))) ||
       my_hash_insert(&ignore_table,
-                     (uchar*) my_strdup("mysql.slave_relay_log_info", MYF(MY_WME))))
+                     (uchar*) my_strdup("mysql.slave_relay_log_info", MYF(MY_WME))) ||
+      my_hash_insert(&ignore_table,
+                     (uchar*) my_strdup("mysql.slave_worker_info", MYF(MY_WME))))
     return(EX_EOM);
 
   if ((ho_error= handle_options(argc, argv, my_long_options, get_one_option)))

=== modified file 'include/my_pthread.h'
--- a/include/my_pthread.h	2011-01-11 09:09:21 +0000
+++ b/include/my_pthread.h	2011-01-11 23:01:02 +0000
@@ -128,6 +128,8 @@ struct timespec {
   ((TS1.tv.i64 > TS2.tv.i64) ? 1 : \
    ((TS1.tv.i64 < TS2.tv.i64) ? -1 : 0))
 
+#define diff_timespec(TS1, TS2) \
+  (TS1.tv.i64 - TS2.tv.i64)
 
 int win_pthread_mutex_trylock(pthread_mutex_t *mutex);
 int pthread_create(pthread_t *, const pthread_attr_t *, pthread_handler, void *);
@@ -457,6 +459,20 @@ int my_pthread_mutex_trylock(pthread_mut
 #endif /* !cmp_timespec */
 #endif /* HAVE_TIMESPEC_TS_SEC */
 
+#ifdef HAVE_TIMESPEC_TS_SEC
+#ifndef diff_timespec
+#define diff_timespec(TS1, TS2) \
+  (((TS1.ts_sec * 1000000000) + TS1.ts_nsec) - \
+   ((TS2.ts_sec * 1000000000) + TS2.ts_nsec))
+#endif /* !diff_timespec */
+#else
+#ifndef diff_timespec
+#define diff_timespec(TS1, TS2) \
+  (((TS1.tv_sec * 1000000000) + TS1.tv_nsec) - \
+   ((TS2.tv_sec * 1000000000) + TS2.tv_nsec))
+#endif /* !diff_timespec */
+#endif /* HAVE_TIMESPEC_TS_SEC */
+
 	/* safe_mutex adds checking to mutex for easier debugging */
 
 typedef struct st_safe_mutex_t

=== modified file 'scripts/mysql_install_db.pl.in'
--- a/scripts/mysql_install_db.pl.in	2010-10-25 10:39:01 +0000
+++ b/scripts/mysql_install_db.pl.in	2010-12-13 21:16:31 +0000
@@ -79,11 +79,6 @@ Usage: $0 [OPTIONS]
                        user.  You must be root to use this option.  By default
                        mysqld runs using your current login name and files and
                        directories that it creates will be owned by you.
-  --rpl-engine=engine  The storage engine used for the mysql.slave_master_info and
-                       mysql.slave_relay_log_info tables. By default, both tables are
-                       created using the MyISAM storage engine. However, any storage
-                       engine available to the server may be used. If a crash-safe
-                       slave is required, the storage engine must be transactional.
 
 All other options are passed to the mysqld program
 
@@ -122,7 +117,6 @@ sub parse_arguments
              "builddir=s",      # FIXME not documented
              "srcdir=s",
              "ldata|datadir=s",
-             "rpl-engine=s",
 
              # Note that the user will be passed to mysqld so that it runs
              # as 'user' (crucial e.g. if log-bin=/some_other_path/
@@ -459,29 +453,6 @@ if ( open(PIPE, "| $mysqld_install_cmd_l
   report_verbose($opt,"OK");
 
   # ----------------------------------------------------------------------
-  # Pipe ALTER TABLE mysql.slave_master_info|slave_relay_log_info to "mysqld --bootstrap"
-  # ----------------------------------------------------------------------
-
-  if ($opt->{'rpl-engine'})
-  {
-    report_verbose_wait($opt,"Setting engine for mysql.slave_master_info mysql.slave_relay_log_info tables...");
-    if ( open(PIPE, "| $mysqld_install_cmd_line") )
-    {
-      print PIPE "use mysql;\n";
-      print PIPE "ALTER TABLE mysql.slave_master_info ENGINE= " . $opt->{'rpl-engine'} . ";\n";
-      print PIPE "ALTER TABLE mysql.slave_relay_log_info ENGINE= " . $opt->{'$rpl-engine'} . ";\n";
-      close PIPE;
-
-      report_verbose($opt,"OK");
-    }
-    else
-    {
-      warning($opt,"CRASH-SAFE SLAVE IS NOT COMPLETELY CONFIGURED!",
-                   "The \"CRASH-SAFE SLAVE\" might not work properly.");
-    }
-  }
-
-  # ----------------------------------------------------------------------
   # Pipe fill_help_tables.sql to "mysqld --bootstrap"
   # ----------------------------------------------------------------------
 

=== modified file 'scripts/mysql_install_db.sh'
--- a/scripts/mysql_install_db.sh	2010-10-25 10:39:01 +0000
+++ b/scripts/mysql_install_db.sh	2010-12-13 21:16:31 +0000
@@ -63,11 +63,6 @@ Usage: $0 [OPTIONS]
                        user.  You must be root to use this option.  By default
                        mysqld runs using your current login name and files and
                        directories that it creates will be owned by you.
-  --rpl-engine=engine  The storage engine used for the mysql.slave_master_info and
-                       mysql.slave_relay_log_info tables. By default, both tables are
-                       created using the MyISAM storage engine. However, any storage
-                       engine available to the server may be used. If a crash-safe
-                       slave is required, the storage engine must be transactional.
 
 All other options are passed to the mysqld program
 
@@ -120,8 +115,6 @@ parse_arguments()
       --no-defaults|--defaults-file=*|--defaults-extra-file=*)
         defaults="$arg" ;;
 
-      --rpl-engine=*) rpl_engine=`parse_arg "$arg"` ;;
-
       --cross-bootstrap|--windows)
         # Used when building the MySQL system tables on a different host than
         # the target. The platform-independent files that are created in
@@ -434,19 +427,6 @@ else
   exit 1
 fi
 
-if test -n "$rpl_engine"
-then
-  s_echo "Setting engine for mysql.slave_master_info mysql.slave_relay_log_info tables..."
-  if { echo "use mysql;"; echo "ALTER TABLE mysql.slave_master_info ENGINE= $rpl_engine;"; echo "ALTER TABLE mysql.slave_relay_log_info ENGINE= $rpl_engine;"; } | $mysqld_install_cmd_line > /dev/null
-  then
-    s_echo "OK"
-  else
-    echo
-    echo "WARNING: CRASH-SAFE SLAVE IS NOT COMPLETELY CONFIGURED!"
-    echo "The \"CRASH-SAFE SLAVE\" might not work properly."
-  fi
-fi
-
 s_echo "Filling help tables..."
 if { echo "use mysql;"; cat $fill_help_tables; } | $mysqld_install_cmd_line > /dev/null
 then

=== modified file 'scripts/mysql_system_tables.sql'
--- a/scripts/mysql_system_tables.sql	2010-12-29 00:38:59 +0000
+++ b/scripts/mysql_system_tables.sql	2011-01-11 23:01:02 +0000
@@ -100,10 +100,12 @@ CREATE TABLE IF NOT EXISTS event ( db ch
 
 CREATE TABLE IF NOT EXISTS ndb_binlog_index (Position BIGINT UNSIGNED NOT NULL, File VARCHAR(255) NOT NULL, epoch BIGINT UNSIGNED NOT NULL, inserts BIGINT UNSIGNED NOT NULL, updates BIGINT UNSIGNED NOT NULL, deletes BIGINT UNSIGNED NOT NULL, schemaops BIGINT UNSIGNED NOT NULL, PRIMARY KEY(epoch)) ENGINE=MYISAM;
 
-CREATE TABLE IF NOT EXISTS slave_relay_log_info (Master_id INTEGER UNSIGNED NOT NULL, Number_of_lines INTEGER UNSIGNED NOT NULL, Relay_log_name TEXT CHARACTER SET utf8 COLLATE utf8_bin NOT NULL, Relay_log_pos BIGINT UNSIGNED NOT NULL, Master_log_name TEXT CHARACTER SET utf8 COLLATE utf8_bin NOT NULL, Master_log_pos BIGINT UNSIGNED NOT NULL, Sql_delay INTEGER NOT NULL, PRIMARY KEY(Master_id)) ENGINE=MYISAM DEFAULT CHARSET=utf8 COMMENT 'Relay Log Information';
+CREATE TABLE IF NOT EXISTS slave_relay_log_info (Master_id INTEGER UNSIGNED NOT NULL, Number_of_lines INTEGER UNSIGNED NOT NULL, Relay_log_name TEXT CHARACTER SET utf8 COLLATE utf8_bin NOT NULL, Relay_log_pos BIGINT UNSIGNED NOT NULL, Master_log_name TEXT CHARACTER SET utf8 COLLATE utf8_bin NOT NULL, Master_log_pos BIGINT UNSIGNED NOT NULL, Sql_delay INTEGER NOT NULL, Number_of_workers INTEGER UNSIGNED NOT NULL, PRIMARY KEY(Master_id)) ENGINE=MYISAM DEFAULT CHARSET=utf8 COMMENT 'Relay Log Information';
 
 CREATE TABLE IF NOT EXISTS slave_master_info (Master_id INTEGER UNSIGNED NOT NULL, Number_of_lines INTEGER UNSIGNED NOT NULL, Master_log_name TEXT CHARACTER SET utf8 COLLATE utf8_bin NOT NULL, Master_log_pos BIGINT UNSIGNED NOT NULL, Host TEXT CHARACTER SET utf8 COLLATE utf8_bin, User_name TEXT CHARACTER SET utf8 COLLATE utf8_bin, User_password TEXT CHARACTER SET utf8 COLLATE utf8_bin, Port INTEGER UNSIGNED NOT NULL, Connect_retry INTEGER UNSIGNED NOT NULL, Enabled_ssl BOOLEAN NOT NULL, Ssl_ca TEXT CHARACTER SET utf8 COLLATE utf8_bin, Ssl_capath TEXT CHARACTER SET utf8 COLLATE utf8_bin, Ssl_cert TEXT CHARACTER SET utf8 COLLATE utf8_bin, Ssl_cipher TEXT CHARACTER SET utf8 COLLATE utf8_bin, Ssl_key TEXT CHARACTER SET utf8 COLLATE utf8_bin, Ssl_verify_servert_cert BOOLEAN NOT NULL, Heartbeat FLOAT NOT NULL, Bind TEXT CHARACTER SET utf8 COLLATE utf8_bin, Ignored_server_ids TEXT CHARACTER SET utf8 COLLATE utf8_bin, Uuid TEXT CHARACTER SET utf8 COLLATE utf8_bin, Retry_count BIGIN!
 T UNSIGNED NOT NULL, PRIMARY KEY(Master_id)) ENGINE=MYISAM DEFAULT CHARSET=utf8 COMMENT 'Master Information';
 
+CREATE TABLE IF NOT EXISTS slave_worker_info (Master_id INTEGER UNSIGNED NOT NULL, Worker_id INTEGER UNSIGNED NOT NULL, Relay_log_name TEXT CHARACTER SET utf8 COLLATE utf8_bin NOT NULL, Relay_log_pos BIGINT UNSIGNED NOT NULL, Master_log_name TEXT CHARACTER SET utf8 COLLATE utf8_bin NOT NULL, Master_log_pos BIGINT UNSIGNED NOT NULL, Checkpoint_relay_log_name TEXT CHARACTER SET utf8 COLLATE utf8_bin NOT NULL, Checkpoint_relay_log_pos BIGINT UNSIGNED NOT NULL, Checkpoint_master_log_name TEXT CHARACTER SET utf8 COLLATE utf8_bin NOT NULL, Checkpoint_master_log_pos BIGINT UNSIGNED NOT NULL, Checkpoint_seqno INT UNSIGNED NOT NULL, Checkpoint_group_size INTEGER UNSIGNED NOT NULL, Checkpoint_group_bitmap BLOB NOT NULL, PRIMARY KEY(Master_id, Worker_id)) ENGINE=MYISAM DEFAULT CHARSET=utf8 COMMENT 'Worker Information';
+
 --
 -- PERFORMANCE SCHEMA INSTALLATION
 -- Note that this script is also reused by mysql_upgrade,

=== modified file 'sql/CMakeLists.txt'
--- a/sql/CMakeLists.txt	2011-01-03 13:12:01 +0000
+++ b/sql/CMakeLists.txt	2011-01-11 23:01:02 +0000
@@ -108,7 +108,8 @@ ADD_DEPENDENCIES(master GenError)
 SET (SLAVE_SOURCE rpl_slave.cc rpl_reporting.cc rpl_mi.cc rpl_rli.cc
 		  rpl_info_handler.cc rpl_info_file.cc rpl_info_table.cc
 		  rpl_info_values.cc rpl_info.cc rpl_info_factory.cc
-		  rpl_info_table_access.cc server_ids.h)
+		  rpl_info_table_access.cc dynamic_ids.cc rpl_rli_pdb.cc
+		  rpl_info_dummy.cc)
 ADD_LIBRARY(slave ${SLAVE_SOURCE})
 ADD_DEPENDENCIES(slave GenError)
 ADD_LIBRARY(sqlgunitlib

=== modified file 'sql/binlog.cc'
--- a/sql/binlog.cc	2010-12-17 02:01:32 +0000
+++ b/sql/binlog.cc	2011-02-27 17:35:25 +0000
@@ -3966,18 +3966,18 @@ err:
     THD::enter_cond() (see NOTES in sql_class.h).
 */
 
-void MYSQL_BIN_LOG::wait_for_update_relay_log(THD* thd)
+int MYSQL_BIN_LOG::wait_for_update_relay_log(THD* thd, const struct timespec *timeout)
 {
-  const char *old_msg;
+  int ret= 0;
   DBUG_ENTER("wait_for_update_relay_log");
 
-  old_msg= thd->enter_cond(&update_cond, &LOCK_log,
-                           "Slave has read all relay log; "
-                           "waiting for the slave I/O "
-                           "thread to update it" );
-  mysql_cond_wait(&update_cond, &LOCK_log);
-  thd->exit_cond(old_msg);
-  DBUG_VOID_RETURN;
+  if (!timeout)
+    mysql_cond_wait(&update_cond, &LOCK_log);
+  else
+    ret= mysql_cond_timedwait(&update_cond, &LOCK_log,
+                              const_cast<struct timespec *>(timeout));
+
+  DBUG_RETURN(ret);
 }
 
 /**
@@ -4535,6 +4535,59 @@ THD::binlog_set_pending_rows_event(Rows_
 }
 
 /**
+   @param db    db name c-string to be inserted into abc-sorted
+                THD::binlog_updated_db_names list.
+
+                Note, as the list node data (explicitly) so the node
+                struct itself (implicitly) are allocated in
+                thd->mem_root to be cleared at the end of the query
+                processing (@c THD::cleanup_after_query()).
+*/
+void
+THD::add_to_binlog_updated_dbs(const char *db)
+{
+  char *after_db;
+  if (binlog_updated_db_names->elements >  MAX_DBS_IN_QUERY_MTS)
+  {
+    push_warning_printf(this, MYSQL_ERROR::WARN_LEVEL_WARN,
+                        ER_UPDATED_DBS_GREATER_MAX,
+                        ER(ER_UPDATED_DBS_GREATER_MAX),
+                        MAX_DBS_IN_QUERY_MTS);
+    return;
+  }
+
+  after_db= strdup_root(mem_root, db);
+  if (binlog_updated_db_names->elements != 0)
+  {
+    List_iterator<char> it(*get_binlog_updated_db_names());
+
+    while (it++)
+    {
+      char *swap= NULL;
+      char **ref_cur_db= it.ref();
+      int cmp= strcmp(after_db, *ref_cur_db);
+
+      DBUG_ASSERT(!swap || cmp < 0);
+      
+      if (cmp == 0)
+      {
+        after_db= NULL;  /* dup to ignore */
+        break;
+      }
+      else if (swap || cmp > 0)
+      {
+        swap= *ref_cur_db;
+        *ref_cur_db= after_db;
+        after_db= swap;
+      }
+    }
+  }
+  if (after_db)
+    binlog_updated_db_names->push_back(after_db);
+}
+
+
+/**
   Decide on logging format to use for the statement and issue errors
   or warnings as needed.  The decision depends on the following
   parameters:
@@ -4755,6 +4808,28 @@ int THD::decide_logging_format(TABLE_LIS
          multi_access_engine= TRUE;
 
       prev_access_table= table->table;
+
+    }
+    
+    /*
+      Master side of DML in the STMT format events parallelization.
+      All involving table db:s are stored in a abc-ordered name list.
+      In case the number of databases exceeds MAX_DBS_IN_QUERY_MTS maximum
+      the list gathering breaks since it won't be sent to the slave.
+    */
+    if (is_write && variables.binlog_format != BINLOG_FORMAT_ROW &&
+        lex->sql_command != SQLCOM_END /* rows-event applying by slave */)
+    {
+      if (!binlog_updated_db_names)
+      {
+        binlog_updated_db_names= new List<char>; /* thd->mem_root is used */
+      }
+      for (TABLE_LIST *table= tables; table; table= table->next_global)
+      {
+        if (table->placeholder())
+          continue;
+        add_to_binlog_updated_dbs(table->db);
+      }
     }
 
     DBUG_PRINT("info", ("flags_write_all_set: 0x%llx", flags_write_all_set));

=== modified file 'sql/binlog.h'
--- a/sql/binlog.h	2010-12-10 16:55:50 +0000
+++ b/sql/binlog.h	2010-12-27 18:54:41 +0000
@@ -179,7 +179,7 @@ public:
   }
   void set_max_size(ulong max_size_arg);
   void signal_update();
-  void wait_for_update_relay_log(THD* thd);
+  int wait_for_update_relay_log(THD* thd, const struct timespec * timeout);
   int  wait_for_update_bin_log(THD* thd, const struct timespec * timeout);
   void set_need_start_event() { need_start_event = 1; }
   void init(bool no_auto_events_arg, ulong max_size);

=== added file 'sql/dynamic_ids.cc'
--- a/sql/dynamic_ids.cc	1970-01-01 00:00:00 +0000
+++ b/sql/dynamic_ids.cc	2010-12-10 12:10:20 +0000
@@ -0,0 +1,144 @@
+#include "dynamic_ids.h"
+
+int cmp_string(const void *id1, const void *id2)
+{
+  return strcmp((char *) id1, (char *) id2);
+}
+
+int cmp_ulong(const void *id1, const void *id2)
+{
+  return ((*(ulong *) id1) - (* (ulong *)id2));
+}
+
+Dynamic_ids::Dynamic_ids(size_t param_size): size(param_size)
+{
+  my_init_dynamic_array(&dynamic_ids, size, 16, 16);
+}
+
+Dynamic_ids::~Dynamic_ids()
+{
+  delete_dynamic(&dynamic_ids);
+}
+
+bool Server_ids::do_unpack_dynamic_ids(char *param_dynamic_ids)
+{
+  char *token= NULL, *last= NULL;
+  uint num_items= 0;
+ 
+  DBUG_ENTER("Server_ids::unpack_dynamic_ids");
+
+  token= strtok_r((char *)const_cast<const char*>(param_dynamic_ids),
+                  " ", &last);
+
+  if (token == NULL)
+    DBUG_RETURN(TRUE);
+
+  num_items= atoi(token);
+  for (uint i=0; i < num_items; i++)
+  {
+    token= strtok_r(NULL, " ", &last);
+    if (token == NULL)
+      DBUG_RETURN(TRUE);
+    else
+    {
+      ulong val= atol(token);
+      insert_dynamic(&dynamic_ids, (uchar *) &val);
+    }
+  }
+  DBUG_RETURN(FALSE);
+}
+
+bool Server_ids::do_pack_dynamic_ids(String *buffer)
+{
+  DBUG_ENTER("Server_ids::pack_dynamic_ids");
+
+  if (buffer->set_int(dynamic_ids.elements, FALSE, &my_charset_bin))
+    DBUG_RETURN(TRUE);
+
+  for (ulong i= 0;
+       i < dynamic_ids.elements; i++)
+  {
+    ulong s_id;
+    get_dynamic(&dynamic_ids, (uchar*) &s_id, i);
+    if (buffer->append(" ") ||
+        buffer->append_ulonglong(s_id))
+      DBUG_RETURN(TRUE);
+  }
+
+  DBUG_RETURN(FALSE);
+}
+
+bool Server_ids::do_search_id(const void *id)
+{
+  return (bsearch((ulong *) id, dynamic_ids.buffer,
+          dynamic_ids.elements, size,
+          (int (*) (const void*, const void*))
+          cmp_ulong) != NULL);
+}
+
+
+bool Database_ids::do_unpack_dynamic_ids(char *param_dynamic_ids)
+{
+  char *token= NULL, *last= NULL;
+  uint num_items= 0;
+ 
+  DBUG_ENTER("Server_ids::unpack_dynamic_ids");
+
+  token= strtok_r((char *)const_cast<const char*>(param_dynamic_ids),
+                  " ", &last);
+
+  if (token == NULL)
+    DBUG_RETURN(TRUE);
+
+  num_items= atoi(token);
+  for (uint i=0; i < num_items; i++)
+  {
+    token= strtok_r(NULL, " ", &last);
+    if (token == NULL)
+      DBUG_RETURN(TRUE);
+    else
+    {
+      size_t size= strlen(token);
+      if (token[size - 1] == '\n')
+      {
+        /*
+          Remove \n as there may be one when reading from file.
+          After improving init_dynarray_intvar_from_file we can
+          remove this.
+        */
+        token[size -1]= '\0';
+      }
+      insert_dynamic(&dynamic_ids, (uchar *) token);
+    }
+  }
+  DBUG_RETURN(FALSE);
+}
+
+bool Database_ids::do_pack_dynamic_ids(String *buffer)
+{
+  char token[2000];
+
+  DBUG_ENTER("Server_ids::pack_dynamic_ids");
+
+  if (buffer->set_int(dynamic_ids.elements, FALSE, &my_charset_bin))
+    DBUG_RETURN(TRUE);
+
+  for (ulong i= 0;
+       i < dynamic_ids.elements; i++)
+  {
+    get_dynamic(&dynamic_ids, (uchar*) token, i);
+    if (buffer->append(" ") ||
+        buffer->append(token))
+      DBUG_RETURN(TRUE);
+  }
+
+  DBUG_RETURN(FALSE);
+}
+
+bool Database_ids::do_search_id(const void *id)
+{
+  return (bsearch((const char *) id, dynamic_ids.buffer,
+          dynamic_ids.elements, size,
+          (int (*) (const void*, const void*))
+          cmp_string) != NULL);
+}

=== renamed file 'sql/server_ids.h' => 'sql/dynamic_ids.h'
--- a/sql/server_ids.h	2010-10-25 10:39:01 +0000
+++ b/sql/dynamic_ids.h	2010-12-10 12:10:20 +0000
@@ -1,20 +1,78 @@
-#ifndef SERVER_ID_H
+/* Copyright (c) 2000, 2010 Oracle and/or its affiliates. All rights reserved.
 
-#define SERVER_ID_H
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+#ifndef DYNAMIC_ID_H
+
+#define DYNAMIC_ID_H
 
 #include <my_sys.h>
 #include <sql_string.h>
 
-class Server_ids
+class Dynamic_ids
 {
-  public:
-    DYNAMIC_ARRAY server_ids;
+public:
+    DYNAMIC_ARRAY dynamic_ids;
 
-    Server_ids();
-    ~Server_ids();
+    Dynamic_ids(size_t param_size);
+    virtual ~Dynamic_ids();
 
-    bool pack_server_ids(String *buffer);
-    bool unpack_server_ids(char *param_server_ids);
+    bool pack_dynamic_ids(String *buffer)
+    {
+      return(do_pack_dynamic_ids(buffer));
+    }
+
+    bool unpack_dynamic_ids(char *param_dynamic_ids)
+    {
+      return(do_unpack_dynamic_ids(param_dynamic_ids));
+    }
+
+    bool search_id(const void *id)
+    {
+      return (do_search_id(id));
+    }
+
+protected:
+    size_t size;
+
+private:
+    virtual bool do_pack_dynamic_ids(String *buffer)= 0;
+    virtual bool do_unpack_dynamic_ids(char *param_dynamic_ids)= 0;
+    virtual bool do_search_id(const void *id)= 0;
 };
 
+class Server_ids : public Dynamic_ids
+{
+public:
+    Server_ids(size_t size): Dynamic_ids(size) { };
+    virtual ~Server_ids() { };
+
+private:
+    bool do_pack_dynamic_ids(String *buffer);
+    bool do_unpack_dynamic_ids(char *param_dynamic_ids);
+    bool do_search_id(const void *id);
+};
+
+class Database_ids : public Dynamic_ids
+{
+public:
+    Database_ids(size_t size): Dynamic_ids(size) { };
+    virtual ~Database_ids() { };
+
+private:
+    bool do_pack_dynamic_ids(String *buffer);
+    bool do_unpack_dynamic_ids(char *param_dynamic_ids);
+    bool do_search_id(const void *id);
+};
 #endif

=== modified file 'sql/events.cc'
--- a/sql/events.cc	2010-11-18 16:34:56 +0000
+++ b/sql/events.cc	2011-02-27 17:35:25 +0000
@@ -384,6 +384,7 @@ Events::create_event(THD *thd, Event_par
       }
       else
       {
+        thd->add_one_db_to_binlog_updated_dbs(parse_data->dbname.str);
         /* If the definer is not set or set to CURRENT_USER, the value of CURRENT_USER
            will be written into the binary log as the definer for the SQL thread. */
         ret= write_bin_log(thd, TRUE, log_query.c_ptr(), log_query.length());
@@ -502,6 +503,12 @@ Events::update_event(THD *thd, Event_par
                                   new_element);
       /* Binlog the alter event. */
       DBUG_ASSERT(thd->query() && thd->query_length());
+
+      thd->set_binlog_updated_db_names(new List<char>);
+      thd->add_to_binlog_updated_dbs(parse_data->dbname.str);
+      if (new_dbname)
+        thd->add_to_binlog_updated_dbs(new_dbname->str);
+
       ret= write_bin_log(thd, TRUE, thd->query(), thd->query_length());
     }
   }
@@ -568,6 +575,8 @@ Events::drop_event(THD *thd, LEX_STRING 
       event_queue->drop_event(thd, dbname, name);
     /* Binlog the drop event. */
     DBUG_ASSERT(thd->query() && thd->query_length());
+
+    thd->add_one_db_to_binlog_updated_dbs(dbname.str);
     ret= write_bin_log(thd, TRUE, thd->query(), thd->query_length());
   }
   /* Restore the state of binlog format */

=== modified file 'sql/field.cc'
--- a/sql/field.cc	2010-12-29 00:38:59 +0000
+++ b/sql/field.cc	2011-02-27 17:35:25 +0000
@@ -3736,7 +3736,12 @@ longlong Field_long::val_int(void)
   ASSERT_COLUMN_MARKED_FOR_READ;
   int32 j;
   /* See the comment in Field_long::store(long long) */
-  DBUG_ASSERT(table->in_use == current_thd);
+  /* 
+     In case the method is executed not by the table's owner
+     that one must be a Slave worker thread.
+  */
+  DBUG_ASSERT(table->in_use == current_thd || (current_thd)->slave_thread);
+
 #ifdef WORDS_BIGENDIAN
   if (table->s->db_low_byte_first)
     j=sint4korr(ptr);
@@ -6308,8 +6313,8 @@ int Field_string::store(const char *from
   const char *cannot_convert_error_pos;
   const char *from_end_pos;
 
-  /* See the comment for Field_long::store(long long) */
-  DBUG_ASSERT(table->in_use == current_thd);
+  /* See the comment for Field_long::store(long long) and Field_long::val_int */
+  DBUG_ASSERT(table->in_use == current_thd || (current_thd)->slave_thread);
 
   copy_length= well_formed_copy_nchars(field_charset,
                                        (char*) ptr, field_length,
@@ -6458,8 +6463,8 @@ String *Field_string::val_str(String *va
 			      String *val_ptr)
 {
   ASSERT_COLUMN_MARKED_FOR_READ;
-  /* See the comment for Field_long::store(long long) */
-  DBUG_ASSERT(table->in_use == current_thd);
+  /* See the comment for Field_long::store(long long) and Field_long::val_int */
+  DBUG_ASSERT(table->in_use == current_thd || (current_thd)->slave_thread);
   uint length;
   if (table->in_use->variables.sql_mode &
       MODE_PAD_CHAR_TO_FULL_LENGTH)

=== modified file 'sql/handler.cc'
--- a/sql/handler.cc	2011-01-11 11:45:02 +0000
+++ b/sql/handler.cc	2011-02-27 17:35:25 +0000
@@ -2127,7 +2127,13 @@ void **handler::ha_data(THD *thd) const
 
 THD *handler::ha_thd(void) const
 {
-  DBUG_ASSERT(!table || !table->in_use || table->in_use == current_thd);
+  /* 
+     About current_thd->slave_thread alternative,
+     MTS coordinator open/closes a temp table while the rest of operation
+     is done by Workers.
+  */
+  DBUG_ASSERT(!table || !table->in_use || table->in_use == current_thd ||
+              current_thd->slave_thread);
   return (table && table->in_use) ? table->in_use : current_thd;
 }
 

=== modified file 'sql/lock.cc'
--- a/sql/lock.cc	2010-11-23 22:37:59 +0000
+++ b/sql/lock.cc	2010-12-13 21:16:31 +0000
@@ -154,8 +154,10 @@ lock_tables_check(THD *thd, TABLE **tabl
         or hold any type of lock in a session,
         since this would be a DOS attack.
       */
-      if (t->reginfo.lock_type >= TL_READ_NO_INSERT ||
-          thd->lex->sql_command == SQLCOM_LOCK_TABLES)
+      if ((t->reginfo.lock_type >= TL_READ_NO_INSERT ||
+          thd->lex->sql_command == SQLCOM_LOCK_TABLES) && 
+          thd->lex->sql_command != SQLCOM_ALTER_TABLE &&
+          !thd->slave_thread)
       {
           my_error(ER_CANT_LOCK_RPL_INFO_TABLE, MYF(0));
           DBUG_RETURN(1);

=== modified file 'sql/log_event.cc'
--- a/sql/log_event.cc	2011-01-11 05:13:23 +0000
+++ b/sql/log_event.cc	2011-05-16 19:43:58 +0000
@@ -44,6 +44,7 @@
 #include "rpl_record.h"
 #include "transaction.h"
 #include <my_dir.h>
+#include "rpl_rli_pdb.h"
 
 #endif /* MYSQL_CLIENT */
 
@@ -175,7 +176,8 @@ void handle_rows_query_log_event(Log_eve
        ev_type == UPDATE_ROWS_EVENT) && rli->rows_query_ev != NULL &&
       ((Rows_log_event*) ev)->get_flags(Rows_log_event::STMT_END_F))
   {
-    delete rli->rows_query_ev;
+    if (rli->rows_query_ev)
+      delete rli->rows_query_ev;
     rli->rows_query_ev= NULL;
     rli->info_thd->set_query(NULL, 0);
   }
@@ -793,6 +795,10 @@ Log_event::Log_event(const char* buf,
 
 #ifndef MYSQL_CLIENT
 #ifdef HAVE_REPLICATION
+inline int Log_event::do_apply_event_worker(Slave_worker *w)
+{ 
+  return do_apply_event(w);
+}
 
 int Log_event::do_update_pos(Relay_log_info *rli)
 {
@@ -2378,6 +2384,745 @@ Log_event::continue_group(Relay_log_info
     return Log_event::EVENT_SKIP_IGNORE;
   return Log_event::do_shall_skip(rli);
 }
+
+
+bool Log_event::contains_partition_info()
+{
+  return get_type_code() == TABLE_MAP_EVENT ||
+
+    // todo: Query event is limitly supported
+    // which ev->get_db() yields the session db not the actual db
+      
+    (get_type_code() == QUERY_EVENT && !ends_group());
+}
+
+/**
+   General hashing function to compute the id of an applier for
+   the current event.
+   At computing the id few rules apply depending on partitioning properties
+   that the event instance can feature.
+
+   Let's call the properties.
+
+   B - beginning of a group of events (BEGIN query_log_event)
+   g - mini-group representative event containing the partition info
+      (any Table_map, a Query_log_event)
+   p - a mini-group internal event that *p*receeding its g-parent
+      (int_, rand_, user_ var:s) 
+   r - a mini-group internal "regular" event that follows its g-parent
+      (Write, Update, Delete -rows)
+   S - sequentially applied event (may not be a part of any group).
+       Events of this type are determined via @c mts_sequential_exec()
+       earlier and don't cause calling this method .
+   T - terminator of the group (XID, COMMIT, ROLLBACK)
+
+   Only `g' case requires to compute the assigned Worker id.
+   In `T, r' cases it's @c last_assigned_worker that is one that was
+   assigned at the last `g' processing.
+   In `B' case it's NULL to indicate the Coordinator will skip doing anything
+   more with the event. Its scheduling gets deffered until the following 
+   `g' event names a Worker.
+   
+   @note `p' and g-Query-log-event is not supported yet.
+
+   @note The function can update APH, CGAP objects.
+
+   @return a pointer to the Worker stuct or NULL.
+*/
+
+Slave_worker *Log_event::get_slave_worker_id(Relay_log_info const *rli)
+{
+  Slave_worker *worker= NULL;
+  Slave_job_group g;
+  bool is_b_event;
+
+  /* checking properties and perform corresponding actions */
+
+  // Beginning of a group or a DDL
+  if ((is_b_event= starts_group()) || !rli->curr_group_seen_begin)
+  {
+    ulong gaq_idx;
+    const_cast<Relay_log_info*>(rli)->mts_total_groups++;
+
+    g.master_log_pos= log_pos;
+    g.group_master_log_pos= g.group_relay_log_pos= 0;
+    g.group_master_log_name= NULL; // todo: remove
+    g.group_relay_log_name= NULL;
+    g.worker_id= (ulong) -1;
+    g.total_seqno= const_cast<Relay_log_info*>(rli)->mts_total_groups;
+    g.checkpoint_log_name= NULL;
+    g.checkpoint_log_pos= 0;
+    g.checkpoint_relay_log_name= NULL;
+    g.checkpoint_relay_log_pos= 0;
+    g.checkpoint_seqno= (uint) -1;
+    g.done= 0;
+
+    // the last occupied GAQ's array index
+    gaq_idx= rli->gaq->assigned_group_index= rli->gaq->en_queue((void *) &g);
+    // serves as a mark for Coord to delete events otherwise
+    const_cast<Relay_log_info*>(rli)->curr_group_is_parallel= TRUE;
+    
+    DBUG_ASSERT(gaq_idx != (ulong) -1 && gaq_idx < rli->gaq->s);
+    DBUG_ASSERT(((Slave_job_group *) 
+                 dynamic_array_ptr(&rli->gaq->Q, rli->gaq->assigned_group_index))->
+                group_relay_log_name == NULL);
+    DBUG_ASSERT(rli->gaq->assigned_group_index != (ulong) -1); // gaq must have room
+    DBUG_ASSERT(rli->last_assigned_worker == NULL);
+    if (is_b_event)
+    {
+      Log_event *ptr_curr_ev= this;
+      // B-event is appended to the Deferred Array associated with GCAP
+      insert_dynamic(&const_cast<Relay_log_info*>(rli)->curr_group_da,
+                     (uchar*) &ptr_curr_ev);
+
+      DBUG_ASSERT(rli->curr_group_da.elements == 1);
+
+      // mark the current grup as started with B-event
+      const_cast<Relay_log_info*>(rli)->curr_group_seen_begin= TRUE;
+      return NULL;
+    } 
+  }
+
+  // mini-group representative
+
+  if (contains_partition_info())
+  {
+    List_iterator<char> it(*mts_get_dbs(rli->info_thd->mem_root));
+
+    it++;
+    do
+    {
+      char **ref_cur_db= it.ref();
+      // a lot of things inside `get_slave_worker_id'
+      const_cast<Relay_log_info *>(rli)->last_assigned_worker=
+        worker= get_slave_worker(*ref_cur_db, const_cast<Relay_log_info *>(rli));
+      get_dynamic(&rli->gaq->Q, (uchar*) &g, rli->gaq->assigned_group_index);
+      if (g.worker_id == (ulong) -1)  // assign "offically" the current group
+      {
+        g.worker_id= worker->id;       // todo/fixme: think of Slave_worker* here
+        set_dynamic(&rli->gaq->Q, (uchar*) &g, rli->gaq->assigned_group_index);
+        
+        DBUG_ASSERT(g.group_relay_log_name == NULL);
+      }
+    } while (mts_number_dbs() != OVER_MAX_DBS_IN_QUERY_MTS && it++);
+
+    // TODO: convert to C's private mem_root.
+
+    // Releasing the Coord's mem-root from the updated dbs. It's safe to do at this
+    // point because the root is no longer needed along remained part of Coordinator's
+    // execution flow.
+    free_root(rli->info_thd->mem_root, MYF(MY_KEEP_PREALLOC));
+  }
+  else // a mini-group internal "regular" event
+    if (rli->last_assigned_worker)
+    {
+      worker= rli->last_assigned_worker;
+      
+      DBUG_ASSERT(rli->curr_group_assigned_parts.elements > 0); // g must've done
+    }
+    else // int_, rand_, user_ var:s
+    {
+      Log_event *ptr_curr_ev= this;
+
+      DBUG_ASSERT(get_type_code() == INTVAR_EVENT ||
+                  get_type_code() == RAND_EVENT ||
+                  get_type_code() == USER_VAR_EVENT ||
+
+                  // (TODO: remove) temprory placed:
+                  get_type_code() ==  ROWS_QUERY_LOG_EVENT);
+
+      insert_dynamic(&const_cast<Relay_log_info*>(rli)->curr_group_da,
+                     (uchar*) &ptr_curr_ev);
+      
+      DBUG_ASSERT(rli->curr_group_da.elements > 0);
+    }
+
+  // the group terminal event (Commit, Xid or a DDL query)
+  if (ends_group() || !rli->curr_group_seen_begin)
+  {
+    uint i;
+    mts_group_cnt= rli->gaq->assigned_group_index;
+    Slave_job_group *ptr_g=
+      (Slave_job_group *)
+      dynamic_array_ptr(&rli->gaq->Q, rli->gaq->assigned_group_index);
+
+    DBUG_ASSERT(rli->curr_group_is_parallel);
+
+    // TODO: throw an error when relay-log reading starts from inside of a group!!
+
+    if (!worker->relay_log_change_notified)
+    {
+      /*
+        Prior this event, C rotated the relay log to drop each
+        Worker's notified flag.
+        Now group terminating event initiates the new name
+        delivery through the current group relaylog slot in GAQ.
+      */
+      DBUG_ASSERT(ptr_g->group_relay_log_name == NULL);
+
+      ptr_g->group_relay_log_name= (char *)
+        my_malloc(strlen(const_cast<Relay_log_info*>(rli)->
+                         get_group_relay_log_name()) + 1, MYF(MY_WME));
+      strcpy(ptr_g->group_relay_log_name,
+             const_cast<Relay_log_info*>(rli)->get_group_relay_log_name());
+
+      DBUG_ASSERT(ptr_g->group_relay_log_name != NULL);
+
+      worker->relay_log_change_notified= TRUE;
+    }
+
+    if (!worker->checkpoint_notified)
+    {
+      // Worker to dealloc
+      // master binlog checkpoint
+      ptr_g->checkpoint_log_name= (char *)
+        my_malloc(strlen(const_cast<Relay_log_info*>(rli)->
+                         get_group_master_log_name()) + 1, MYF(MY_WME));
+      strcpy(ptr_g->checkpoint_log_name,
+             const_cast<Relay_log_info*>(rli)->get_group_master_log_name());
+      ptr_g->checkpoint_log_pos= const_cast<Relay_log_info*>(rli)->get_group_master_log_pos();
+      // relay log checkpoint
+      ptr_g->checkpoint_relay_log_name= (char *)
+        my_malloc(strlen(const_cast<Relay_log_info*>(rli)->
+                         get_group_relay_log_name()) + 1, MYF(MY_WME));
+      strcpy(ptr_g->checkpoint_relay_log_name,
+             const_cast<Relay_log_info*>(rli)->get_group_relay_log_name());
+      ptr_g->checkpoint_relay_log_pos= const_cast<Relay_log_info*>(rli)->get_group_relay_log_pos();
+      worker->checkpoint_notified= TRUE;
+    }
+    ptr_g->checkpoint_seqno= rli->checkpoint_seqno;
+    const_cast<Relay_log_info*>(rli)->checkpoint_seqno++;
+
+    DBUG_ASSERT(worker == rli->last_assigned_worker);
+
+    if (!worker)
+    {
+      DBUG_ASSERT(0); 
+
+      // a very special case of the empty group: {B, T}
+      DBUG_ASSERT(rli->curr_group_assigned_parts.elements == 0
+                  && rli->curr_group_da.elements == 1);
+      worker= get_slave_worker("", const_cast<Relay_log_info *>(rli));
+    }
+    
+    // CGAP cleanup
+    for (i= rli->curr_group_assigned_parts.elements; i > 0; i--)
+      delete_dynamic_element(&const_cast<Relay_log_info*>(rli)->
+                             curr_group_assigned_parts, i - 1);
+    const_cast<Relay_log_info*>(rli)->last_assigned_worker= NULL;
+
+    // reset the B-group marker
+    const_cast<Relay_log_info*>(rli)->curr_group_seen_begin= FALSE;
+  }
+  
+  return worker;
+}
+
+// returns the next available! (TODO: incompatible to circurla_buff method!!!)
+static int en_queue(Slave_jobs_queue *jobs, Slave_job_item *item)
+{
+  if (jobs->a == jobs->s)
+  {
+    DBUG_ASSERT(jobs->a == jobs->Q.elements);
+    return -1;
+  }
+
+  // store
+
+  set_dynamic(&jobs->Q, (uchar*) item, jobs->a);
+
+  // pre-boundary cond
+  if (jobs->e == jobs->s)
+    jobs->e= jobs->a;
+  
+  jobs->a= (jobs->a + 1) % jobs->s;
+  jobs->len++;
+
+  // post-boundary cond
+  if (jobs->a == jobs->e)
+    jobs->a= jobs->s;
+  DBUG_ASSERT(jobs->a == jobs->e || 
+              jobs->len == (jobs->a >= jobs->e) ?
+              (jobs->a - jobs->e) : (jobs->s + jobs->a - jobs->e));
+  return jobs->a;
+}
+
+/**
+   return the value of @c data member of the head of the queue.
+*/
+static void * head_queue(Slave_jobs_queue *jobs, Slave_job_item *ret)
+{
+  if (jobs->e == jobs->s)
+  {
+    DBUG_ASSERT(jobs->len == 0);
+    ret->data= NULL;               // todo: move to caller
+    return NULL;
+  }
+  get_dynamic(&jobs->Q, (uchar*) ret, jobs->e);
+
+  DBUG_ASSERT(ret->data);         // todo: move to caller
+ 
+  return ret;
+}
+
+
+/**
+   return a job item through a struct which point is supplied via argument.
+*/
+Slave_job_item * de_queue(Slave_jobs_queue *jobs, Slave_job_item *ret)
+{
+  if (jobs->e == jobs->s)
+  {
+    DBUG_ASSERT(jobs->len == 0);
+    return NULL;
+  }
+  get_dynamic(&jobs->Q, (uchar*) ret, jobs->e);
+  jobs->len--;
+  
+  // pre boundary cond
+  if (jobs->a == jobs->s)
+    jobs->a= jobs->e;
+  jobs->e= (jobs->e + 1) % jobs->s;
+
+  // post boundary cond
+  if (jobs->a == jobs->e)
+    jobs->e= jobs->s;
+
+  DBUG_ASSERT(jobs->e == jobs->s ||
+              (jobs->len == (jobs->a >= jobs->e)? (jobs->a - jobs->e) :
+               (jobs->s + jobs->a - jobs->e)));
+
+  return ret;
+}
+
+void append_item_to_jobs(slave_job_item *job_item,
+                         Slave_worker *w, Relay_log_info *rli)
+{
+  THD *thd= rli->info_thd;
+  int ret;
+  ulong ev_size= ((Log_event*) (job_item->data))->data_written;
+  ulonglong new_pend_size;
+
+  DBUG_ASSERT(thd == current_thd);
+  thd_proc_info(thd, "Feeding an event to a worker thread");
+
+  mysql_mutex_lock(&rli->pending_jobs_lock);
+  new_pend_size= rli->mts_pending_jobs_size + ev_size;
+  // C waits basing on *data* sizes in the queues
+  while (new_pend_size > rli->mts_pending_jobs_size_max)
+  {
+    const char *old_msg;
+    const char info_format[]=
+      "Waiting for Slave Workers to free pending events, requested size %lu";
+    char wait_info[sizeof(info_format) + 4*sizeof(new_pend_size)];
+
+    sprintf(wait_info, info_format, new_pend_size);
+    rli->mts_wqs_oversize= TRUE;
+    rli->wait_jobs++; // waiting due to the total size
+    old_msg= thd->enter_cond(&rli->pending_jobs_cond, &rli->pending_jobs_lock,
+                             wait_info);
+    mysql_cond_wait(&rli->pending_jobs_cond, &rli->pending_jobs_lock);
+    thd->exit_cond(old_msg);
+    if (thd->killed)
+      return;
+
+    mysql_mutex_lock(&rli->pending_jobs_lock);
+
+    new_pend_size= rli->mts_pending_jobs_size + ev_size;
+  }
+  rli->pending_jobs++;
+  rli->mts_pending_jobs_size= new_pend_size;
+  rli->stmt_jobs++;
+
+  mysql_mutex_unlock(&rli->pending_jobs_lock);
+
+  // sleep while all queue lengths are gt Underrun
+  // sleep time lasts the longer the further WQ:s shift to Overrun
+  // Workers report their U,O status
+
+  if (rli->mts_wqs_underrun_w_id != (ulong) -1)
+  {
+    // todo: experiment with weight to get a good approximation formula
+    ulong nap_weight= rli->mts_wqs_overrun + 1;
+    my_sleep(nap_weight * rli->mts_coordinator_basic_nap);
+    rli->mts_wqs_underrun_cnt++;
+  }
+
+  ret= -1;
+
+  mysql_mutex_lock(&w->jobs_lock);
+
+  // possible WQ overfill
+  while (!w->info_thd->killed && !thd->killed &&
+         (ret= en_queue(&w->jobs, job_item)) == -1)
+  {
+    const char *old_msg;
+    const char info_format[]=
+      "Waiting for Slave Worker %d queue: max len %lu, actual len %lu";
+    char wait_info[sizeof(info_format) + 4*sizeof(w->id) +
+                   4*sizeof(w->jobs.s) + 4*sizeof(w->jobs.len)];
+    
+    sprintf(wait_info, info_format, w->id, w->jobs.s, w->jobs.len);
+    old_msg= thd->enter_cond(&w->jobs_cond, &w->jobs_lock, wait_info);
+    w->jobs.overfill= TRUE;
+    w->jobs.waited_overfill++;
+    rli->mts_wqs_overfill_cnt++;
+    mysql_cond_wait(&w->jobs_cond, &w->jobs_lock);
+    thd->exit_cond(old_msg);
+    
+    mysql_mutex_lock(&w->jobs_lock);
+  }
+  if (ret != -1)
+  {
+    w->curr_jobs++;
+    if (w->jobs.len == 1)
+      mysql_cond_signal(&w->jobs_cond);
+    
+    mysql_mutex_unlock(&w->jobs_lock);
+  }
+  else
+  {
+    mysql_mutex_unlock(&w->jobs_lock);
+
+    mysql_mutex_lock(&rli->pending_jobs_lock);
+    rli->pending_jobs--;                  // roll back of the prev incr
+    rli->mts_pending_jobs_size -= ev_size;
+    mysql_mutex_unlock(&rli->pending_jobs_lock);
+  }
+}
+
+/**
+   scheduling event execution either serially or in parallel
+*/
+int Log_event::apply_event(Relay_log_info const *rli)
+{
+  uint i;
+  DBUG_ENTER("LOG_EVENT:apply_event");
+  Slave_worker *w= NULL;
+  Slave_job_item item= {NULL}, *job_item= &item;
+  Relay_log_info *c_rli= const_cast<Relay_log_info*>(rli);  // constless alias
+  bool parallel, seq_event, term_event;
+
+  if (rli->is_mts_recovery())
+  {
+    bool skip= _bitmap_is_set(&c_rli->recovery_groups, c_rli->mts_recovery_index);
+
+    if (ends_group()) // TODO: || ! seen_begin
+    {
+      c_rli->mts_recovery_index++;
+      if (--c_rli->mts_recovery_group_cnt == 0)
+      {
+        c_rli->recovery_parallel_workers= c_rli->slave_parallel_workers;
+        c_rli->mts_recovery_index= 0;
+      }
+    }
+    if (skip)
+      DBUG_RETURN(0);
+    else 
+      DBUG_RETURN(do_apply_event(rli));
+  }
+
+  if (!(parallel= rli->is_parallel_exec()) ||
+      ((seq_event= mts_sequential_exec()) &&
+       (!rli->curr_group_seen_begin ||
+        mts_async_exec_by_coordinator(::server_id))))
+  {
+    if (parallel)
+    {
+      /* 
+         There are two classes of events that Coordinator executes
+         itself. One requires all Workers to finish up their assignments.
+         The other does not need (actually can not have) this synchronization.
+      */
+
+      if (!mts_async_exec_by_coordinator(::server_id))
+      {
+        /*
+          this  event does not split the current group but is indeed
+          a separator beetwen two master's binlog therefore requiring
+          Workers to sync.
+        */
+
+        DBUG_ASSERT(!rli->curr_group_seen_begin);
+
+        /*
+          marking the event as not being executed in parallel that affects
+          memory deallocation in the following execution path.
+        */
+        c_rli->curr_group_is_parallel= FALSE;
+        (void) wait_for_workers_to_finish(rli);
+      }
+      else
+      {
+        if (rli->curr_group_is_parallel)
+        {
+          /* 
+             the event is artifical to splits the current group into separate
+             relay-logs. Differently to the previous events of the group this one
+             is applied by Coordinator and w/o any synchronization with Workers.
+          */
+          c_rli->curr_group_split= TRUE;
+          c_rli->curr_group_is_parallel= FALSE;
+        }
+      }
+    }
+    DBUG_RETURN(do_apply_event(rli));
+  }
+
+  DBUG_ASSERT(!(rli->curr_group_seen_begin && ends_group()) ||
+              rli->last_assigned_worker);
+
+  /* 
+     Todo: disassociate Rows_* events from the central rli.
+  */
+  if (seq_event)
+  {   // rli->last_assigned_worker != NULL if BTQ but not BQT
+    DBUG_ASSERT(rli->curr_group_seen_begin || ends_group());
+    if (!c_rli->curr_group_isolated)
+      (void) wait_for_workers_to_finish(rli, rli->last_assigned_worker);
+    c_rli->curr_group_isolated= TRUE;
+
+    if (get_type_code() == ROWS_QUERY_LOG_EVENT)
+    {
+      while (c_rli->rows_query_ev != NULL)
+      {
+        my_sleep(10);
+      }
+      c_rli->rows_query_ev= (Rows_query_log_event*) this;
+     }
+   }
+
+  // getting Worker's id
+  if ((!(w= get_slave_worker_id(rli)) ||
+       DBUG_EVALUATE_IF("fault_injection_get_slave_worker", 1, 0)))
+    DBUG_RETURN(rli->curr_group_assigned_parts.elements == 0 ? FALSE : TRUE);
+
+  job_item->data= this;
+
+  DBUG_PRINT("Log_event::apply_event:", ("-> job item data %p to W_%lu", job_item->data, w->id));
+
+  if (rli->curr_group_da.elements > 0)
+  {
+    /*
+      the current event sorted out which partion the current group belongs to.
+      It's time now to processed deferred array events.
+    */
+    for (i= 0; i < rli->curr_group_da.elements; i++)
+    { 
+      Slave_job_item da_item;
+      get_dynamic(&c_rli->curr_group_da, (uchar*) &da_item.data, i);
+      append_item_to_jobs(&da_item, w, c_rli);
+    }
+    if (rli->curr_group_da.elements > rli->curr_group_da.max_element)
+    {
+      // reallocate to less mem
+      
+      DBUG_ASSERT(rli->curr_group_da.max_element < rli->curr_group_da.elements);
+      
+      c_rli->curr_group_da.elements= rli->curr_group_da.max_element;
+      c_rli->curr_group_da.max_element= 0;
+      freeze_size(&c_rli->curr_group_da); // restores max_element
+    }
+    c_rli->curr_group_da.elements= 0;
+  }
+
+  if (c_rli->curr_group_isolated)
+    term_event= ends_group();
+
+  append_item_to_jobs(job_item, w, c_rli);
+
+  if (c_rli->curr_group_isolated && term_event)
+  {
+    (void) wait_for_workers_to_finish(rli);
+    c_rli->curr_group_isolated= FALSE;
+  }
+
+  DBUG_RETURN(FALSE);
+}
+
+
+/**
+   Worker's routine to wait for a new assignement in its
+   private queue.
+   
+   @return NULL failure or
+           a-pointer to an item.
+*/
+struct slave_job_item* pop_jobs_item(Slave_worker *w, Slave_job_item *job_item)
+{
+  THD *thd= w->info_thd;
+  mysql_mutex_lock(&w->jobs_lock);
+  while (!job_item->data && !thd->killed)
+  {
+    const char *old_msg;
+
+    //job_item= w->jobs.pop(); // LABS-TODO de_queue()
+    head_queue(&w->jobs, job_item);
+
+    if (job_item->data == NULL)
+    {
+      w->wait_jobs++;
+      old_msg= thd->enter_cond(&w->jobs_cond, &w->jobs_lock,
+                               "Waiting for an event from sql thread");
+      mysql_cond_wait(&w->jobs_cond, &w->jobs_lock);
+      thd->exit_cond(old_msg);
+      mysql_mutex_lock(&w->jobs_lock);
+    }
+  }
+  if (job_item->data)
+    w->curr_jobs--;
+  mysql_mutex_unlock(&w->jobs_lock);
+
+  thd_proc_info(w->info_thd, "Executing event");
+  return job_item;
+}
+
+
+/**
+  mts-II worker main routine.
+  The worker thread waits for an event, execute it, fixes statistics counters.
+
+  @note the function maintains CGEP and modifies APH, and causes
+        modification of GAQ.
+
+  @return 0 success 
+         -1 got killed or an error happened during appying
+*/
+int slave_worker_exec_job(Slave_worker *w, Relay_log_info *rli)
+{
+  int error= 0;
+  struct slave_job_item item= {NULL}, *job_item= &item;
+  THD *thd= w->info_thd;
+  Log_event *ev= NULL;
+
+  DBUG_ENTER("slave_worker_exec_job");
+
+  job_item= pop_jobs_item(w, job_item);
+  if (thd->killed)
+  {
+    // de-queueing and decrement counters is in the caller's exit branch
+    error= -1;
+    goto err;
+  }
+  ev= static_cast<Log_event*>(job_item->data);
+  thd->server_id = ev->server_id;
+  thd->set_time();
+  thd->lex->current_select= 0;
+  if (!ev->when)
+    ev->when= my_time(0);
+  ev->thd= thd; // todo: assert because up to this point, ev->thd == 0
+
+  DBUG_PRINT("slave_worker_exec_job:", ("W_%lu <- job item: %p data: %p thd: %p", w->id, job_item, ev, thd));
+
+  if (ev->starts_group())
+  {
+    w->curr_group_seen_begin= TRUE; // The current group is started with B-event
+  } 
+  else
+  {
+    if (ev->contains_partition_info())
+    {
+      List_iterator<char> it(*ev->mts_get_dbs(thd->mem_root));
+      DYNAMIC_ARRAY *ep= &(w->curr_group_exec_parts->dynamic_ids);
+      
+      while (it++)
+      {
+        bool found= FALSE;
+        char key[NAME_LEN + 2];
+        const char *dbname= *it.ref();
+        uchar dblength= (uint) strlen(dbname);
+
+        for (uint i= 0; i < ep->elements && !found; i++)
+        {
+          get_dynamic(ep, (uchar*) key, i);
+          found=
+            (key[0] == dblength) &&
+            (strncmp(key + 1, const_cast<char*>(dbname), dblength) == 0);
+        }
+        if (!found)
+        {
+          key[0]= dblength;
+          memcpy(key + 1, dbname, dblength + 1);
+          insert_dynamic(ep, (uchar*) key);
+        }
+      }
+    }
+  }
+
+  error= ev->do_apply_event_worker(w);
+  
+  if (ev->ends_group() || !w->curr_group_seen_begin)
+  {
+    DBUG_PRINT("slave_worker_exec_job:", (" commits GAQ index %lu, last committed  %lu", ev->mts_group_cnt, w->last_group_done_index));
+
+    w->slave_worker_ends_group(ev, error); /* last done sets post exec */
+  }
+
+  mysql_mutex_lock(&w->jobs_lock);
+  de_queue(&w->jobs, job_item);
+
+  /* possible overfill */
+  if (w->jobs.len == w->jobs.s - 1 && w->jobs.overfill == TRUE)
+  {
+    w->jobs.overfill= FALSE;
+    // todo: w->hungry_cnt++;
+    mysql_cond_signal(&w->jobs_cond);
+  }
+  mysql_mutex_unlock(&w->jobs_lock);
+
+  /* statistics */
+
+  mysql_mutex_lock(&rli->pending_jobs_lock);
+  rli->pending_jobs--;
+  rli->mts_pending_jobs_size -= ev->data_written;
+  DBUG_ASSERT(rli->mts_pending_jobs_size < rli->mts_pending_jobs_size_max);
+  
+  // underrun
+  if ((rli->mts_worker_underrun_level * w->jobs.s) / 100 >  w->jobs.len)
+  {
+    rli-> mts_wqs_underrun_w_id= w->id;
+    // todo: w->underrun_cnt++;
+  } else if (rli->mts_wqs_underrun_w_id == w->id)
+  {
+    rli->mts_wqs_underrun_w_id= (ulong) -1;
+  }
+
+  // overrun exploits the underrun level param
+  if (((100 - rli->mts_worker_underrun_level) * w->jobs.s) / 100 < w->jobs.len)
+  {
+    rli->mts_wqs_overrun++;
+    w->wq_overrun_set= TRUE;
+    // todo: w->underrun_cnt++;
+  }
+  else if (w->wq_overrun_set == TRUE)
+  {
+    rli->mts_wqs_overrun--;
+    w->wq_overrun_set= FALSE;
+  }
+
+  DBUG_ASSERT(rli->mts_wqs_overrun >= 0);
+
+  /* coordinator can be waiting */
+  if (rli->mts_pending_jobs_size < rli->mts_pending_jobs_size_max &&
+      rli->mts_wqs_oversize)  // TODO: unit/general test wqs_oversize
+  {
+    rli->mts_wqs_oversize= FALSE;
+    mysql_cond_signal(&rli->pending_jobs_cond);
+  }
+  
+  mysql_mutex_unlock(&rli->pending_jobs_lock);
+
+  w->stmt_jobs++;
+
+err:
+
+  // TODO: fix w/a for Rows_query_log_event
+  if (ev && ev->get_type_code() != ROWS_QUERY_LOG_EVENT)
+    delete ev;  // after ev->update_pos() event is garbage
+
+  DBUG_RETURN(error);
+}
+
 #endif
 
 /**************************************************************************
@@ -2640,6 +3385,38 @@ bool Query_log_event::write(IO_CACHE* fi
       start+= host.length;
     }
   }
+
+  if (thd && thd->get_binlog_updated_db_names() != NULL)
+  {
+    uchar dbs;
+    *start++= Q_UPDATED_DB_NAMES;
+
+    compile_time_assert(MAX_DBS_IN_QUERY_MTS <= OVER_MAX_DBS_IN_QUERY_MTS);
+
+    /* 
+       in case of the number of db:s exceeds  MAX_DBS_IN_QUERY_MTS
+       no db:s is written and event will require the sequential applying on slave.
+    */
+    dbs= *start++=
+      (thd->get_binlog_updated_db_names()->elements <= MAX_DBS_IN_QUERY_MTS) ?
+      thd->get_binlog_updated_db_names()->elements : OVER_MAX_DBS_IN_QUERY_MTS;
+
+    DBUG_ASSERT(dbs != 0);
+
+    if (dbs <= MAX_DBS_IN_QUERY_MTS)
+    {
+      List_iterator_fast<char> it(*thd->get_binlog_updated_db_names());
+      char *db_name;
+
+      while ((db_name= it++))
+      {
+        strcpy((char*) start, db_name);
+        start += strlen(db_name) + 1;
+      }
+    }
+    thd->clear_binlog_updated_db_names();
+  }
+
   /*
     NOTE: When adding new status vars, please don't forget to update
     the MAX_SIZE_LOG_EVENT_STATUS in log_event.h and update the function
@@ -2723,7 +3500,7 @@ Query_log_event::Query_log_event(THD* th
    lc_time_names_number(thd_arg->variables.lc_time_names->number),
    charset_database_number(0),
    table_map_for_update((ulonglong)thd_arg->table_map_for_update),
-   master_data_written(0)
+   master_data_written(0), mts_updated_dbs(0)
 {
   time_t end_time;
 
@@ -2924,6 +3701,7 @@ code_name(int code)
   case Q_CHARSET_DATABASE_CODE: return "Q_CHARSET_DATABASE_CODE";
   case Q_TABLE_MAP_FOR_UPDATE_CODE: return "Q_TABLE_MAP_FOR_UPDATE_CODE";
   case Q_MASTER_DATA_WRITTEN_CODE: return "Q_MASTER_DATA_WRITTEN_CODE";
+  case Q_UPDATED_DB_NAMES: return "Q_UPDATED_DB_NAMES";
   }
   sprintf(buf, "CODE#%d", code);
   return buf;
@@ -2961,7 +3739,8 @@ Query_log_event::Query_log_event(const c
    flags2_inited(0), sql_mode_inited(0), charset_inited(0),
    auto_increment_increment(1), auto_increment_offset(1),
    time_zone_len(0), lc_time_names_number(0), charset_database_number(0),
-   table_map_for_update(0), master_data_written(0)
+   table_map_for_update(0), master_data_written(0),
+   mts_updated_dbs(OVER_MAX_DBS_IN_QUERY_MTS)
 {
   ulong data_len;
   uint32 tmp;
@@ -3140,6 +3919,31 @@ Query_log_event::Query_log_event(const c
       CHECK_SPACE(pos, end, host.length);
       host.str= (char *)pos;
       pos+= host.length;
+      break;
+    }
+    case Q_UPDATED_DB_NAMES:
+    {
+      CHECK_SPACE(pos, end, 1);
+      mts_updated_dbs= *pos++;
+      /* 
+         Notice, the following check is positive also in case of
+         the master's MAX_DBS_IN_QUERY_MTS > the slave's one and the event 
+         contains e.g the master's MAX_DBS_IN_QUERY_MTS db:s.
+      */
+      if (mts_updated_dbs > MAX_DBS_IN_QUERY_MTS)
+      {
+        mts_updated_dbs= OVER_MAX_DBS_IN_QUERY_MTS;
+        break;
+      }
+
+      DBUG_ASSERT(mts_updated_dbs != 0);
+
+      for (uchar i= 0; i < mts_updated_dbs; i++)
+      {
+        strcpy(mts_updated_db_names[i], (char*) pos);
+        pos+= 1 + strlen((const char*) pos);
+      }
+      break;
     }
     default:
       /* That's why you must write status vars in growing order of code */
@@ -3459,8 +4263,12 @@ int Query_log_event::do_apply_event(Rela
   const_cast<Relay_log_info*>(rli)->set_future_group_master_log_pos(log_pos);
   DBUG_PRINT("info", ("log_pos: %lu", (ulong) log_pos));
 
+  /*
+    todo: such cleanup should not be specific to Query event and therefore
+          is preferable at a common with other event pre-execution point
+  */
   clear_all_errors(thd, const_cast<Relay_log_info*>(rli));
-  if (strcmp("COMMIT", query) == 0 && rli->tables_to_lock)
+  if (strcmp("COMMIT", query) == 0 && rli->tables_to_lock != NULL)
   {
     /*
       Cleaning-up the last statement context:
@@ -3504,7 +4312,8 @@ int Query_log_event::do_apply_event(Rela
   */
   if (is_trans_keyword() || rpl_filter->db_ok(thd->db))
   {
-    thd->set_time((time_t)when);
+    thd->set_time(!opt_mts_slave_local_timestamp ? (time_t)when : my_time(0));
+    //thd->set_query_and_id((char*)query_arg, q_len_arg, next_query_id());
     thd->set_query_and_id((char*)query_arg, q_len_arg,
                           thd->charset(), next_query_id());
     thd->variables.pseudo_thread_id= thread_id;		// for temp tables
@@ -3771,6 +4580,9 @@ Default database: '%s'. Query: '%s'",
       The sql thread receives the killed status and will proceed
       to shutdown trying to finish incomplete events group.
     */
+
+    // TODO: address the middle-group killing in MTS case
+
     DBUG_EXECUTE_IF("stop_slave_middle_group",
                     if (strcmp("COMMIT", query) != 0 &&
                         strcmp("BEGIN", query) != 0)
@@ -5211,7 +6023,7 @@ int Load_log_event::do_apply_event(NET* 
   */
   if (rpl_filter->db_ok(thd->db))
   {
-    thd->set_time((time_t)when);
+    thd->set_time(!opt_mts_slave_local_timestamp ? (time_t)when : my_time(0));
     thd->set_query_id(next_query_id());
     thd->warning_info->opt_clear_warning_info(thd->query_id);
 
@@ -5605,6 +6417,7 @@ int Rotate_log_event::do_update_pos(Rela
                         rli->get_group_master_log_name(),
                         (ulong) rli->get_group_master_log_pos()));
     mysql_mutex_unlock(&rli->data_lock);
+    
     rli->flush_info(TRUE);
     
     /*
@@ -5965,10 +6778,31 @@ void Xid_log_event::print(FILE* file, PR
 
 
 #if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT)
-int Xid_log_event::do_apply_event(Relay_log_info const *rli)
+
+int Xid_log_event::do_apply_event_worker(Slave_worker *w)
 {
   int error= 0;
+  bool is_trans_repo= w->is_transactional();
 
+  if (is_trans_repo)
+  {
+    ulong gaq_idx= mts_group_cnt;
+    Slave_job_group *ptr_g=
+      (Slave_job_group *) dynamic_array_ptr(&w->c_rli->gaq->Q, gaq_idx);
+
+    if ((error= w->commit_positions(this, ptr_g)))
+      goto err;
+  }
+  error= trans_commit(thd); /* Automatically rolls back on error. */
+  thd->mdl_context.release_transactional_locks();
+
+err:
+  return error;
+}
+
+int Xid_log_event::do_apply_event(Relay_log_info const *rli)
+{
+  int error= 0;
   Relay_log_info *rli_ptr= const_cast<Relay_log_info *>(rli);
 
   /*
@@ -6013,7 +6847,7 @@ int Xid_log_event::do_apply_event(Relay_
     {
       rli_ptr->set_group_master_log_pos(log_pos);
     }
-  
+
     if ((error= rli_ptr->flush_info(TRUE)))
       goto err;
   }
@@ -8016,7 +8850,7 @@ int Rows_log_event::do_apply_event(Relay
       Rows_log_event, we can invalidate the query cache for the
       associated table.
      */
-    for (TABLE_LIST *ptr= rli->tables_to_lock ; ptr ; ptr= ptr->next_global)
+    for (TABLE_LIST *ptr= rli->tables_to_lock ; ptr; ptr= ptr->next_global)
     {
       const_cast<Relay_log_info*>(rli)->m_table_map.set_table(ptr->table_id, ptr->table);
     }
@@ -8025,10 +8859,9 @@ int Rows_log_event::do_apply_event(Relay
 #endif
   }
 
-  TABLE* 
-    table= 
+  TABLE* table= 
     m_table= const_cast<Relay_log_info*>(rli)->m_table_map.get_table(m_table_id);
-
+  
   DBUG_PRINT("debug", ("m_table: 0x%lx, m_table_id: %lu", (ulong) m_table, m_table_id));
 
   if (table)
@@ -8048,7 +8881,7 @@ int Rows_log_event::do_apply_event(Relay
       TIMESTAMP column to a table with one.
       So we call set_time(), like in SBR. Presently it changes nothing.
     */
-    thd->set_time((time_t)when);
+    thd->set_time(!opt_mts_slave_local_timestamp ? (time_t)when : my_time(0));
 
     /*
       Now we are in a statement and will stay in a statement until we
@@ -8777,6 +9610,7 @@ int Table_map_log_event::do_apply_event(
 
   int error= 0;
 
+  // mts-II todo: consider filtering
   if (rli->info_thd->slave_thread /* filtering is for slave only */ &&
       (!rpl_filter->db_ok(table_list->db) ||
        (rpl_filter->is_on() && !rpl_filter->tables_ok("", table_list))))
@@ -8810,6 +9644,7 @@ int Table_map_log_event::do_apply_event(
     table_list->next_global= table_list->next_local= rli->tables_to_lock;
     const_cast<Relay_log_info*>(rli)->tables_to_lock= table_list;
     const_cast<Relay_log_info*>(rli)->tables_to_lock_count++;
+
     /* 'memory' is freed in clear_tables_to_lock */
   }
 
@@ -10562,7 +11397,7 @@ Rows_query_log_event::write_data_body(IO
 int Rows_query_log_event::do_apply_event(Relay_log_info const *rli)
 {
   DBUG_ENTER("Rows_query_log_event::do_apply_event");
-  DBUG_ASSERT(rli->info_thd == thd);
+  DBUG_ASSERT(rli->info_thd == thd || rli->is_parallel_exec());
   /* Set query for writing Rows_query log event into binlog later.*/
   thd->set_query(m_rows_query, (uint32) strlen(m_rows_query));
   DBUG_RETURN(0);

=== modified file 'sql/log_event.h'
--- a/sql/log_event.h	2011-01-11 05:13:23 +0000
+++ b/sql/log_event.h	2011-05-16 19:43:58 +0000
@@ -258,6 +258,18 @@ struct sql_ex_info
 #define INCIDENT_HEADER_LEN    2
 #define HEARTBEAT_HEADER_LEN   0
 #define IGNORABLE_HEADER_LEN   0
+
+/*
+   The maximum number of updated databases that a status of Query-log-event can carry.
+   In can redefined still to not be bigger than OVER_MAX_DBS_IN_QUERY_MTS.
+*/
+#define MAX_DBS_IN_QUERY_MTS 16
+/*
+   When the actual number of db:s exceeds MAX_DBS_IN_QUERY_MTS
+   the value of OVER_MAX_DBS_IN_QUERY_MTS is is put into the mts_updated_dbs status.
+*/
+#define OVER_MAX_DBS_IN_QUERY_MTS 254
+
 /* 
   Max number of possible extra bytes in a replication event compared to a
   packet (i.e. a query) sent from client to master;
@@ -273,6 +285,8 @@ struct sql_ex_info
                                    1 + 2          /* type, charset_database_number */ + \
                                    1 + 8          /* type, table_map_for_update */ + \
                                    1 + 4          /* type, master_data_written */ + \
+                                                  /* type, db_1, db_2, ... */  \
+                                   1 + (MAX_DBS_IN_QUERY_MTS * (1 + NAME_LEN)) + \
                                    1 + 16 + 1 + 60/* type, user_len, user, host_len, host */)
 #define MAX_LOG_EVENT_HEADER   ( /* in order of Query_log_event::write */ \
   LOG_EVENT_HEADER_LEN + /* write_header */ \
@@ -344,6 +358,13 @@ struct sql_ex_info
 
 #define Q_INVOKER 11
 
+/*
+  Q_UPDATED_DB_NAMES status variable collects of the updated db:s
+  total number and their names to be propagated to the slave in order
+  to facilitate the parallel applying of the Query events.
+*/
+#define Q_UPDATED_DB_NAMES 12
+
 /* Intvar event post-header */
 
 /* Intvar event data */
@@ -661,6 +682,8 @@ class THD;
 
 class Format_description_log_event;
 class Relay_log_info;
+class Slave_worker;
+class Slave_committed_queue;
 
 #ifdef MYSQL_CLIENT
 enum enum_base64_output_mode {
@@ -755,11 +778,11 @@ typedef struct st_print_event_info
   Such identifier is not yet unique generally as the event originating master
   is resetable. Also the crashed master can be replaced with some other.
 */
-struct event_coordinates
+typedef struct event_coordinates
 {
   char * file_name; // binlog file name (directories stripped)
   my_off_t  pos;       // event's position in the binlog file
-};
+} LOG_POS_COORD;
 
 /**
   @class Log_event
@@ -1009,9 +1032,20 @@ public:
   ulong slave_exec_mode;
 
   /**
+    Index in @c rli->gaq array to indicate a group that this event is purging.
+    The index is set by C:r to a group terminator event is checked by W at 
+    the event execution. The indexed data represent the Worker progress status.
+  */
+  ulong mts_group_cnt;
+
+  /* a copy of the main rli value stored into event to pass to MTS worker rli */
+  ulonglong future_event_relay_log_pos;
+
+  /**
     Placeholder for event checksum while writing to binlog.
-   */
+  */
   ha_checksum crc;
+
 #ifdef MYSQL_SERVER
   THD* thd;
 
@@ -1056,6 +1090,24 @@ public:
   {
     return thd ? thd->db : 0;
   }
+
+  /*
+    The method returns a list of updated by the event databases.
+    Other than in the case of Query-log-event the list is just one item.
+  */
+  virtual List<char>* mts_get_dbs(MEM_ROOT *mem_root)
+  {
+    List<char> *res= new List<char>;
+    res->push_back(strdup_root(mem_root, get_db()));
+    return res;
+  }
+
+  /*
+    returns the number of updated by the event databases.
+    In other than Query-log-event case that's one.
+  */
+  virtual uint8 mts_number_dbs() { return 1; }
+
 #else
   Log_event() : temp_buf(0) {}
     /* avoid having to link mysqlbinlog against libpthread */
@@ -1178,6 +1230,79 @@ public:
 public:
 
   /**
+     MST: to execute serially due to technical or conceptual limitation
+     
+     @return TRUE if despite permanent parallel execution mode an event
+                  needs applying in a real isolation that is sequentially.
+  */
+  bool mts_sequential_exec()
+  {
+    return
+      /* 
+         the 4 types below are limitly parallel-supported (the default 
+         session db not the actual db).
+         Decision on BEGIN, COMMIT, Xid is the parallel.
+      */
+      (get_type_code() == QUERY_EVENT &&
+       !starts_group() && !ends_group() &&
+       (mts_number_dbs() ==  OVER_MAX_DBS_IN_QUERY_MTS)) ||
+
+      get_type_code() == START_EVENT_V3          ||
+      get_type_code() == STOP_EVENT              ||
+      get_type_code() == ROTATE_EVENT            ||
+      get_type_code() == LOAD_EVENT              ||
+      get_type_code() == SLAVE_EVENT             ||
+      get_type_code() == CREATE_FILE_EVENT       ||
+      get_type_code() == APPEND_BLOCK_EVENT      ||
+      get_type_code() == EXEC_LOAD_EVENT         ||
+      get_type_code() == DELETE_FILE_EVENT       ||
+      get_type_code() == NEW_LOAD_EVENT          ||
+      get_type_code() == FORMAT_DESCRIPTION_EVENT||
+      get_type_code() == BEGIN_LOAD_QUERY_EVENT  ||
+      get_type_code() == EXECUTE_LOAD_QUERY_EVENT|| /* todo: make parallel */
+      get_type_code() == PRE_GA_WRITE_ROWS_EVENT ||
+      get_type_code() == PRE_GA_UPDATE_ROWS_EVENT||
+      get_type_code() == PRE_GA_DELETE_ROWS_EVENT||
+
+      get_type_code() == ROWS_QUERY_LOG_EVENT    || /* TODO: make parallel */
+
+      get_type_code() == INCIDENT_EVENT;
+  }
+
+  /**
+     MST: some events have to be applied by Coordinator concurrently with Workers.
+
+     @return TRUE  if that's the case,
+             FALSE otherwise.
+  */
+  bool mts_async_exec_by_coordinator(ulong slave_server_id)
+  {
+    return
+      (get_type_code() == FORMAT_DESCRIPTION_EVENT ||
+       get_type_code() == ROTATE_EVENT) &&
+      ((server_id == (uint32) ::server_id) || (log_pos == 0));
+  }
+
+  /**
+     Events of a cetain type carry partitioning data such as db names.
+  */
+  bool contains_partition_info();
+
+  /**
+     Events of a cetain type start or end a group of events treated
+     transactionally wrt binlog.
+  */
+  virtual bool starts_group() { return FALSE; }
+  virtual bool ends_group()   { return FALSE; }
+
+  /**
+     @return index  in \in [0, M] range to indicate
+             to be assigned worker;
+             M is the max index of the worker pool.
+  */
+  Slave_worker *get_slave_worker_id(Relay_log_info const *rli);
+
+  /**
      Apply the event to the database.
 
      This function represents the public interface for applying an
@@ -1185,11 +1310,7 @@ public:
 
      @see do_apply_event
    */
-  int apply_event(Relay_log_info const *rli)
-  {
-    return do_apply_event(rli);
-  }
-
+  int apply_event(Relay_log_info const *rli);
 
   /**
      Update the relay log position.
@@ -1215,27 +1336,6 @@ public:
     return do_shall_skip(rli);
   }
 
-protected:
-
-  /**
-     Helper function to ignore an event w.r.t. the slave skip counter.
-
-     This function can be used inside do_shall_skip() for functions
-     that cannot end a group. If the slave skip counter is 1 when
-     seeing such an event, the event shall be ignored, the counter
-     left intact, and processing continue with the next event.
-
-     A typical usage is:
-     @code
-     enum_skip_reason do_shall_skip(Relay_log_info *rli) {
-       return continue_group(rli);
-     }
-     @endcode
-
-     @return Skip reason
-   */
-  enum_skip_reason continue_group(Relay_log_info *rli);
-
   /**
     Primitive to apply an event to the database.
 
@@ -1256,6 +1356,28 @@ protected:
     return 0;                /* Default implementation does nothing */
   }
 
+  virtual int do_apply_event_worker(Slave_worker *w);
+
+protected:
+
+  /**
+     Helper function to ignore an event w.r.t. the slave skip counter.
+
+     This function can be used inside do_shall_skip() for functions
+     that cannot end a group. If the slave skip counter is 1 when
+     seeing such an event, the event shall be ignored, the counter
+     left intact, and processing continue with the next event.
+
+     A typical usage is:
+     @code
+     enum_skip_reason do_shall_skip(Relay_log_info *rli) {
+       return continue_group(rli);
+     }
+     @endcode
+
+     @return Skip reason
+   */
+  enum_skip_reason continue_group(Relay_log_info *rli);
 
   /**
      Advance relay log coordinates.
@@ -1763,12 +1885,38 @@ public:
     Q_MASTER_DATA_WRITTEN_CODE to the slave's server binlog.
   */
   uint32 master_data_written;
+  /*
+    number of updated db:s by the query and their names. This info
+    is requested by both Coordinator and Worker.
+  */
+  uchar mts_updated_dbs;
+  char mts_updated_db_names[MAX_DBS_IN_QUERY_MTS][NAME_LEN];
 
 #ifdef MYSQL_SERVER
 
   Query_log_event(THD* thd_arg, const char* query_arg, ulong query_length,
                   bool using_trans, bool direct, bool suppress_use, int error);
   const char* get_db() { return db; }
+
+  /**
+     Returns a list of updated db:s or the default db single item list
+     in case of over-MAX_DBS_IN_QUERY_MTS actual db:s.
+  */
+  virtual List<char>* mts_get_dbs(MEM_ROOT *mem_root)
+  {
+    List<char> *res= new (mem_root) List<char>;
+    if (mts_updated_dbs == OVER_MAX_DBS_IN_QUERY_MTS)
+      res->push_back((char*) get_db());
+    else
+      for (uchar i= 0; i < mts_updated_dbs; i++)
+        res->push_back(mts_updated_db_names[i]);
+    return res;
+  }
+
+  virtual uchar mts_number_dbs() { return mts_updated_dbs; }
+
+  virtual uchar mts_number_of_updated_dbs() { return mts_updated_dbs; }
+
 #ifdef HAVE_REPLICATION
   void pack_info(Protocol* protocol);
 #endif /* HAVE_REPLICATION */
@@ -1832,6 +1980,20 @@ public:        /* !!! Public in this pat
       !strncasecmp(query, "SAVEPOINT", 9) ||
       !strncasecmp(query, "ROLLBACK", 8);
   }
+  /**
+     todo: Parallel support for DDL:s.
+     DDL queries are logged without BEGIN/COMMIT parentheses
+     and can be regarded as the starting and the ending events of 
+     its self-group.
+  */
+  bool starts_group() { return !strncmp(query, "BEGIN", q_len); }
+  bool ends_group()
+  {  
+    return
+      !strncmp(query, "COMMIT", q_len) ||
+      (!strncasecmp(query, STRING_WITH_LEN("ROLLBACK"))
+       && strncasecmp(query, STRING_WITH_LEN("ROLLBACK TO ")));
+  }
 };
 
 
@@ -2499,10 +2661,11 @@ class Xid_log_event: public Log_event
   bool write(IO_CACHE* file);
 #endif
   bool is_valid() const { return 1; }
-
+  bool ends_group() { return TRUE; }
 private:
 #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION)
   virtual int do_apply_event(Relay_log_info const *rli);
+  virtual int do_apply_event_worker(Slave_worker *rli);
   enum_skip_reason do_shall_skip(Relay_log_info *rli);
 #endif
 };

=== modified file 'sql/mysqld.cc'
--- a/sql/mysqld.cc	2011-01-03 15:05:36 +0000
+++ b/sql/mysqld.cc	2011-02-27 17:35:25 +0000
@@ -339,6 +339,9 @@ static char *lc_time_names_name;
 char *my_bind_addr_str;
 static char *default_collation_name;
 char *default_storage_engine;
+char *master_info_engine= 0;
+char *relay_log_info_engine= 0;
+char *worker_info_engine= 0;
 static char compiled_default_collation_name[]= MYSQL_DEFAULT_COLLATION_NAME;
 static I_List<THD> thread_cache;
 static bool binlog_format_used= false;
@@ -462,6 +465,13 @@ ulong slave_trans_retries;
 uint  slave_net_timeout;
 ulong slave_exec_mode_options;
 ulonglong slave_type_conversions_options;
+ulong opt_mts_slave_parallel_workers;
+ulong opt_mts_slave_worker_queue_len_max;
+my_bool opt_mts_slave_local_timestamp;
+ulong opt_mts_partition_hash_soft_max;
+ulonglong opt_mts_pending_jobs_size_max;
+ulong opt_mts_coordinator_basic_nap;
+ulong opt_mts_worker_underrun_level;
 ulong thread_cache_size=0;
 ulong binlog_cache_size=0;
 ulonglong  max_binlog_cache_size=0;
@@ -499,7 +509,8 @@ ulong prepared_stmt_count=0;
 ulong thread_id=1L,current_pid;
 ulong slow_launch_threads = 0;
 uint sync_binlog_period= 0, sync_relaylog_period= 0,
-     sync_relayloginfo_period= 0, sync_masterinfo_period= 0;
+     sync_relayloginfo_period= 0, sync_masterinfo_period= 0,
+     mts_checkpoint_period= 0, mts_checkpoint_group;
 ulong expire_logs_days = 0;
 
 const double log_10[] = {
@@ -5963,6 +5974,15 @@ struct my_option my_long_options[]=
   {"default-storage-engine", 0, "The default storage engine for new tables",
    &default_storage_engine, 0, 0, GET_STR, REQUIRED_ARG,
    0, 0, 0, 0, 0, 0 },
+  {"master-info-engine", 0, "The storage engine for the master info repository",
+   &master_info_engine, 0, 0, GET_STR, REQUIRED_ARG,
+   0, 0, 0, 0, 0, 0 },
+  {"relay-log-info-engine", 0, "The storage engine for the relay info repository",
+   &relay_log_info_engine, 0, 0, GET_STR, REQUIRED_ARG,
+   0, 0, 0, 0, 0, 0 },
+  {"worker-info-engine", 0, "The storage engine for the worker info repository",
+   &worker_info_engine, 0, 0, GET_STR, REQUIRED_ARG,
+   0, 0, 0, 0, 0, 0 },
   {"default-time-zone", 0, "Set the default time zone.",
    &default_tz_name, &default_tz_name,
    0, GET_STR, REQUIRED_ARG, 0, 0, 0, 0, 0, 0 },
@@ -7904,9 +7924,8 @@ PSI_mutex_key key_BINLOG_LOCK_index, key
   key_LOCK_server_started, key_LOCK_status,
   key_LOCK_system_variables_hash, key_LOCK_table_share, key_LOCK_thd_data,
   key_LOCK_user_conn, key_LOCK_uuid_generator, key_LOG_LOCK_log,
-  key_master_info_data_lock, key_master_info_run_lock,
-  key_mutex_slave_reporting_capability_err_lock, key_relay_log_info_data_lock,
-  key_relay_log_info_log_space_lock, key_relay_log_info_run_lock,
+  key_mutex_slave_reporting_capability_err_lock,
+  key_relay_log_info_log_space_lock,
   key_structure_guard_mutex, key_TABLE_SHARE_LOCK_ha_data,
   key_LOCK_error_messages, key_LOG_INFO_lock, key_LOCK_thread_count,
   key_PARTITION_LOCK_auto_inc;
@@ -7947,12 +7966,8 @@ static PSI_mutex_info all_server_mutexes
   { &key_LOCK_user_conn, "LOCK_user_conn", PSI_FLAG_GLOBAL},
   { &key_LOCK_uuid_generator, "LOCK_uuid_generator", PSI_FLAG_GLOBAL},
   { &key_LOG_LOCK_log, "LOG::LOCK_log", 0},
-  { &key_master_info_data_lock, "Master_info::data_lock", 0},
-  { &key_master_info_run_lock, "Master_info::run_lock", 0},
   { &key_mutex_slave_reporting_capability_err_lock, "Slave_reporting_capability::err_lock", 0},
-  { &key_relay_log_info_data_lock, "Relay_log_info::data_lock", 0},
   { &key_relay_log_info_log_space_lock, "Relay_log_info::log_space_lock", 0},
-  { &key_relay_log_info_run_lock, "Relay_log_info::run_lock", 0},
   { &key_structure_guard_mutex, "Query_cache::structure_guard_mutex", 0},
   { &key_TABLE_SHARE_LOCK_ha_data, "TABLE_SHARE::LOCK_ha_data", 0},
   { &key_LOCK_error_messages, "LOCK_error_messages", PSI_FLAG_GLOBAL},
@@ -7986,10 +8001,8 @@ PSI_cond_key key_BINLOG_COND_prep_xids, 
   key_COND_cache_status_changed, key_COND_manager,
   key_COND_server_started,
   key_delayed_insert_cond, key_delayed_insert_cond_client,
-  key_item_func_sleep_cond, key_master_info_data_cond,
-  key_master_info_start_cond, key_master_info_stop_cond,
-  key_relay_log_info_data_cond, key_relay_log_info_log_space_cond,
-  key_relay_log_info_start_cond, key_relay_log_info_stop_cond,
+  key_item_func_sleep_cond,
+  key_relay_log_info_log_space_cond,
   key_TABLE_SHARE_cond, key_user_level_lock_cond,
   key_COND_thread_count, key_COND_thread_cache, key_COND_flush_thread_cache;
 
@@ -8011,13 +8024,7 @@ static PSI_cond_info all_server_conds[]=
   { &key_delayed_insert_cond, "Delayed_insert::cond", 0},
   { &key_delayed_insert_cond_client, "Delayed_insert::cond_client", 0},
   { &key_item_func_sleep_cond, "Item_func_sleep::cond", 0},
-  { &key_master_info_data_cond, "Master_info::data_cond", 0},
-  { &key_master_info_start_cond, "Master_info::start_cond", 0},
-  { &key_master_info_stop_cond, "Master_info::stop_cond", 0},
-  { &key_relay_log_info_data_cond, "Relay_log_info::data_cond", 0},
   { &key_relay_log_info_log_space_cond, "Relay_log_info::log_space_cond", 0},
-  { &key_relay_log_info_start_cond, "Relay_log_info::start_cond", 0},
-  { &key_relay_log_info_stop_cond, "Relay_log_info::stop_cond", 0},
   { &key_TABLE_SHARE_cond, "TABLE_SHARE::cond", 0},
   { &key_user_level_lock_cond, "User_level_lock::cond", 0},
   { &key_COND_thread_count, "COND_thread_count", PSI_FLAG_GLOBAL},

=== modified file 'sql/mysqld.h'
--- a/sql/mysqld.h	2010-12-17 11:28:59 +0000
+++ b/sql/mysqld.h	2011-02-27 17:35:25 +0000
@@ -123,6 +123,9 @@ extern my_bool opt_enable_shared_memory;
 extern char *default_tz_name;
 extern Time_zone *default_tz;
 extern char *default_storage_engine;
+extern char *master_info_engine;
+extern char *relay_log_info_engine;
+extern char *worker_info_engine;
 extern bool opt_endinfo, using_udf_functions;
 extern my_bool locked_in_memory;
 extern bool opt_using_transactions;
@@ -130,7 +133,8 @@ extern ulong current_pid;
 extern ulong expire_logs_days;
 extern my_bool relay_log_recovery;
 extern uint sync_binlog_period, sync_relaylog_period, 
-            sync_relayloginfo_period, sync_masterinfo_period;
+            sync_relayloginfo_period, sync_masterinfo_period,
+            mts_checkpoint_period, mts_checkpoint_group;
 extern ulong opt_tc_log_size, tc_log_max_pages_used, tc_log_page_size;
 extern ulong tc_log_page_waits;
 extern my_bool relay_log_purge, opt_innodb_safe_binlog, opt_innodb;
@@ -175,6 +179,14 @@ extern my_bool allow_slave_start;
 extern LEX_CSTRING reason_slave_blocked;
 extern ulong slave_trans_retries;
 extern uint  slave_net_timeout;
+extern ulong opt_mts_slave_parallel_workers;
+extern ulong opt_mts_slave_worker_queue_len_max;
+extern my_bool opt_mts_slave_local_timestamp;
+extern ulong opt_mts_partition_hash_soft_max;
+extern ulonglong opt_mts_pending_jobs_size_max;
+extern ulong opt_mts_coordinator_basic_nap;
+extern ulong opt_mts_worker_underrun_level;
+
 extern uint max_user_connections;
 extern ulong what_to_log,flush_time;
 extern ulong max_prepared_stmt_count, prepared_stmt_count;

=== modified file 'sql/rpl_info.cc'
--- a/sql/rpl_info.cc	2010-12-21 09:33:41 +0000
+++ b/sql/rpl_info.cc	2010-12-27 18:54:41 +0000
@@ -17,61 +17,142 @@
 #include <sql_priv.h>
 #include "rpl_info.h"
 
-Rpl_info::Rpl_info(const char* type
-#ifdef HAVE_PSI_INTERFACE
-                   ,PSI_mutex_key *param_key_info_run_lock,
-                   PSI_mutex_key *param_key_info_data_lock,
-                   PSI_mutex_key *param_key_info_data_cond,
-                   PSI_mutex_key *param_key_info_start_cond,
-                   PSI_mutex_key *param_key_info_stop_cond
-#endif
-                 )
-  :Slave_reporting_capability(type),
-#ifdef HAVE_PSI_INTERFACE
-  key_info_run_lock(param_key_info_run_lock),
-  key_info_data_lock(param_key_info_data_lock),
-  key_info_data_cond(param_key_info_data_cond),
-  key_info_start_cond(param_key_info_start_cond),
-  key_info_stop_cond(param_key_info_stop_cond),
-#endif
-  info_thd(0), inited(0), abort_slave(0),
+Rpl_info::Rpl_info(const char* type)
+  : Slave_reporting_capability(type),
+  info_thd(0), uidx(0), nidx(0),
+  inited(0), abort_slave(0),
   slave_running(0), slave_run_id(0),
   handler(0)
 {
-#ifdef HAVE_PSI_INTERFACE
-  mysql_mutex_init(*key_info_run_lock,
-                    &run_lock, MY_MUTEX_INIT_FAST);
-  mysql_mutex_init(*key_info_data_lock,
-                   &data_lock, MY_MUTEX_INIT_FAST);
-  mysql_cond_init(*key_info_data_cond, &data_cond, NULL);
-  mysql_cond_init(*key_info_start_cond, &start_cond, NULL);
-  mysql_cond_init(*key_info_stop_cond, &stop_cond, NULL);
-#else
-  mysql_mutex_init(NULL, &run_lock, MY_MUTEX_INIT_FAST);
-  mysql_mutex_init(NULL, &data_lock, MY_MUTEX_INIT_FAST);
-  mysql_cond_init(NULL, &data_cond, NULL);
-  mysql_cond_init(NULL, &start_cond, NULL);
-  mysql_cond_init(NULL, &stop_cond, NULL);
-#endif
+
 }
 
 Rpl_info::~Rpl_info()
 {
-  DBUG_ENTER("Rpl_info::~Rpl_info");
-
-  mysql_mutex_destroy(&run_lock);
-  mysql_mutex_destroy(&data_lock);
-  mysql_cond_destroy(&data_cond);
-  mysql_cond_destroy(&start_cond);
-  mysql_cond_destroy(&stop_cond);
+  if (uidx)
+    delete []uidx;
 
   if (handler)
     delete handler;
-
-  DBUG_VOID_RETURN;
 }
 
 void Rpl_info::set_rpl_info_handler(Rpl_info_handler * param_handler)
 {
   handler= param_handler;
 }
+
+Rpl_info_coordinator::Rpl_info_coordinator(const char* type, const char* psf)
+  : Rpl_info(type), mutex_info(0), cond_info(0), key_mutex_info(0),
+  key_cond_info(0)
+{
+  register_mutexes(psf);
+}
+
+bool Rpl_info_coordinator::register_mutexes(const char* description)
+{
+  const int NUMBER_MUTEX_INFO= 2;
+  const int RUN_LOCK_IDX=   0;
+  const int DATA_LOCK_IDX=  1;
+
+  const int NUMBER_COND_INFO= 3;
+  const int DATA_COND_IDX=  0;
+  const int START_COND_IDX= 1;
+  const int STOP_COND_IDX=  2;
+
+  if (!(key_mutex_info= new PSI_mutex_key[NUMBER_MUTEX_INFO]) ||
+      !(key_cond_info= new PSI_cond_key[NUMBER_COND_INFO]) ||
+      !(mutex_info= new PSI_mutex_info[NUMBER_MUTEX_INFO]) ||
+      !(cond_info= new PSI_cond_info[NUMBER_COND_INFO]))
+    goto err;
+
+  mutex_info[RUN_LOCK_IDX].m_key= (PSI_mutex_key *) &(key_mutex_info[RUN_LOCK_IDX]);
+  mutex_info[RUN_LOCK_IDX].m_name= "Rpl_info_coordinator::run_lock";
+  mutex_info[RUN_LOCK_IDX].m_flags= 0;
+  mutex_info[DATA_LOCK_IDX].m_key= (PSI_mutex_key *) &(key_mutex_info[DATA_LOCK_IDX]);
+  mutex_info[DATA_LOCK_IDX].m_name= "Rpl_info_coordinator::data_lock";
+  mutex_info[DATA_LOCK_IDX].m_flags= 0;
+
+  cond_info[DATA_COND_IDX].m_key= (PSI_cond_key *) &(key_cond_info[DATA_COND_IDX]);
+  cond_info[DATA_COND_IDX].m_name= "Rpl_info_coordinator::data_cond";
+  cond_info[DATA_COND_IDX].m_flags= 0;
+  cond_info[START_COND_IDX].m_key= (PSI_cond_key *) &(key_cond_info[START_COND_IDX]);
+  cond_info[START_COND_IDX].m_name= "Rpl_info_coordinator::start_cond";
+  cond_info[START_COND_IDX].m_flags= 0;
+  cond_info[STOP_COND_IDX].m_key= (PSI_cond_key *) &(key_cond_info[STOP_COND_IDX]);
+  cond_info[STOP_COND_IDX].m_name= "Rpl_info_coordinator::stop_cond";
+  cond_info[STOP_COND_IDX].m_flags= 0;
+
+  if (PSI_server)
+  {
+    PSI_server->register_mutex(description, mutex_info,
+                               NUMBER_MUTEX_INFO);
+
+    PSI_server->register_cond(description, cond_info,
+                              NUMBER_COND_INFO);
+  }
+
+  mysql_mutex_init(key_mutex_info[RUN_LOCK_IDX], &run_lock,
+                   MY_MUTEX_INIT_FAST);
+  mysql_mutex_init(key_mutex_info[DATA_LOCK_IDX], &data_lock,
+                   MY_MUTEX_INIT_FAST);
+
+  mysql_cond_init(key_cond_info[DATA_COND_IDX], &data_cond, NULL);
+  mysql_cond_init(key_cond_info[START_COND_IDX], &start_cond, NULL);
+  mysql_cond_init(key_cond_info[STOP_COND_IDX], &stop_cond, NULL);
+
+  registered_mutexes= TRUE; 
+
+  return (FALSE);
+
+err:
+  if (key_mutex_info)
+    delete []key_mutex_info;
+  
+  if (key_cond_info)
+    delete []key_cond_info;
+
+  if (mutex_info)
+    delete []mutex_info;
+
+  if (cond_info)
+    delete []cond_info;
+
+  return (TRUE);
+}
+
+Rpl_info_coordinator::~Rpl_info_coordinator()
+{
+  DBUG_ENTER("Rpl_info_coordinator::~Rpl_info_coordinator");
+
+  if (registered_mutexes)
+  {
+    mysql_mutex_destroy(&run_lock);
+    mysql_mutex_destroy(&data_lock);
+    mysql_cond_destroy(&data_cond);
+    mysql_cond_destroy(&start_cond);
+    mysql_cond_destroy(&stop_cond);
+  
+    if (key_mutex_info)
+      delete []key_mutex_info;
+  
+    if (key_cond_info)
+      delete []key_cond_info;
+
+    if (mutex_info)
+      delete []mutex_info;
+
+    if (cond_info)
+      delete []cond_info;
+  }
+
+  DBUG_VOID_RETURN;
+}
+
+Rpl_info_worker::Rpl_info_worker(const char* type, const char* pfs)
+  : Rpl_info(type)
+{
+}
+
+Rpl_info_worker::~Rpl_info_worker()
+{
+}

=== modified file 'sql/rpl_info.h'
--- a/sql/rpl_info.h	2010-12-21 09:33:41 +0000
+++ b/sql/rpl_info.h	2010-12-27 18:54:41 +0000
@@ -20,58 +20,33 @@
 #include "sql_class.h"
 #include "rpl_info_handler.h"
 #include "rpl_reporting.h"
+#include "rpl_constants.h"
 
 class Rpl_info : public Slave_reporting_capability
 {
 public:
-  /*
-    standard lock acquisition order to avoid deadlocks:
-    run_lock, data_lock, relay_log.LOCK_log, relay_log.LOCK_index
-  */
-  mysql_mutex_t data_lock,run_lock;
-  /*
-    start_cond is broadcast when SQL thread is started
-    stop_cond - when stopped
-    data_cond - when data protected by data_lock changes
-  */
-  mysql_cond_t data_cond,start_cond, stop_cond;
-
-#ifdef HAVE_PSI_INTERFACE
-  PSI_mutex_key *key_info_run_lock, *key_info_data_lock;
+  THD *info_thd;
 
-  PSI_mutex_key *key_info_data_cond, *key_info_start_cond, *key_info_stop_cond;
-#endif
+  ulong *uidx;
 
-  THD *info_thd;
+  uint nidx;
 
   bool inited;
   volatile bool abort_slave;
   volatile uint slave_running;
   volatile ulong slave_run_id;
 
-#ifndef DBUG_OFF
-  int events_until_exit;
-#endif
-
-  Rpl_info(const char* type
-#ifdef HAVE_PSI_INTERFACE
-           ,PSI_mutex_key *param_key_info_run_lock,
-           PSI_mutex_key *param_key_info_data_lock,
-           PSI_mutex_key *param_key_info_data_cond,
-           PSI_mutex_key *param_key_info_start_cond,
-           PSI_mutex_key *param_key_info_stop_cond
-#endif
-          );
+  Rpl_info(const char *type);
   virtual ~Rpl_info();
 
   int check_info()
   {
-    return (handler->check_info());
+    return (handler->check_info(uidx, nidx));
   }
 
   int remove_info()
   {
-    return (handler->remove_info());
+    return (handler->remove_info(uidx, nidx));
   }
 
   bool is_transactional()
@@ -79,6 +54,12 @@ public:
     return (handler->is_transactional());
   }
 
+  void set_idx_info(ulong *param_uidx, uint param_nidx)
+  {
+    uidx= param_uidx;
+    nidx= param_nidx;
+  }
+
   char *get_description_info()
   {
     return (handler->get_description_info());
@@ -86,7 +67,7 @@ public:
 
   bool copy_info(Rpl_info_handler *from, Rpl_info_handler *to)
   {
-    if (read_info(from) || write_info(to, TRUE))
+    if (read_info(from) || write_info(to))
       return(TRUE);
 
     return(FALSE);
@@ -104,9 +85,72 @@ protected:
 
 private:
   virtual bool read_info(Rpl_info_handler *from)= 0;
-  virtual bool write_info(Rpl_info_handler *to, bool force)= 0;
+  virtual bool write_info(Rpl_info_handler *to)= 0;
 
   Rpl_info& operator=(const Rpl_info& info);
   Rpl_info(const Rpl_info& info);
 };
+
+class Rpl_info_coordinator: public Rpl_info
+{
+public:
+  /*
+    standard lock acquisition order to avoid deadlocks:
+    run_lock, data_lock, relay_log.LOCK_log, relay_log.LOCK_index
+  */
+  mysql_mutex_t data_lock,run_lock;
+
+  /*
+    start_cond is broadcast when SQL thread is started
+    stop_cond - when stopped
+    data_cond - when data protected by data_lock changes
+  */
+  mysql_cond_t data_cond, start_cond, stop_cond;
+
+#ifndef DBUG_OFF
+  int events_until_exit;
+#endif
+
+  Rpl_info_coordinator(const char* type, const char* pfs);
+  virtual ~Rpl_info_coordinator();
+
+  /**
+    Identifies if mutexes and condition variables were successfuly
+    created and registered. 
+  */
+  bool registered_mutexes;
+
+protected:
+  /**
+    Registers mutexes and condition variables in the performance
+    schema.
+
+    @param description identifier to ease its localization in
+    the peformance schema.
+
+    @return FALSE if success, TRUE if error.
+  */
+  bool register_mutexes(const char* description);
+
+private:
+  PSI_mutex_info *mutex_info;
+  PSI_cond_info *cond_info;
+  PSI_mutex_key *key_mutex_info;
+  PSI_cond_key *key_cond_info;
+
+  Rpl_info_coordinator& operator=(const Rpl_info_coordinator& info);
+  Rpl_info_coordinator(const Rpl_info_coordinator& info);
+};
+
+class Rpl_info_worker: public Rpl_info
+{
+public:
+
+  Rpl_info_worker(const char* type, const char* pfs);
+  virtual ~Rpl_info_worker();
+
+private:
+  Rpl_info_worker& operator=(const Rpl_info_worker& info);
+  Rpl_info_worker(const Rpl_info_worker& info);
+};
 #endif /* RPL_INFO_H */

=== added file 'sql/rpl_info_dummy.cc'
--- a/sql/rpl_info_dummy.cc	1970-01-01 00:00:00 +0000
+++ b/sql/rpl_info_dummy.cc	2010-12-15 17:46:05 +0000
@@ -0,0 +1,177 @@
+/* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+#include <my_global.h>
+#include "rpl_info_dummy.h"
+
+Rpl_info_dummy::Rpl_info_dummy(const int nparam)
+  :Rpl_info_handler(nparam)
+{
+}
+
+int Rpl_info_dummy::do_init_info(const ulong *uidx __attribute__((unused)),
+                                const uint nidx __attribute__((unused)))
+{
+  return 0;
+}
+
+int Rpl_info_dummy::do_prepare_info_for_read(const uint nidx
+                                             __attribute__((unused)))
+{
+  if (abort) DBUG_ASSERT(0);
+  cursor= 0;
+  return 0;
+}
+
+int Rpl_info_dummy::do_prepare_info_for_write(const uint nidx
+                                              __attribute__((unused)))
+{
+  if (abort) DBUG_ASSERT(0);
+  cursor= 0;
+  return 0;
+}
+
+int Rpl_info_dummy::do_check_info(const ulong *uidx __attribute__((unused)),
+                                 const uint nidx __attribute__((unused)))
+{
+  if (abort) DBUG_ASSERT(0);
+  return 0;
+}
+
+int Rpl_info_dummy::do_flush_info(const ulong *uidx __attribute__((unused)),
+                                 const uint nidx __attribute__((unused)),
+                                 const bool force __attribute__((unused)))
+{
+  if (abort) DBUG_ASSERT(0);
+  return 0;
+}
+
+void Rpl_info_dummy::do_end_info(const ulong *uidx __attribute__((unused)),
+                                const uint nidx __attribute__((unused)))
+{
+  return;
+}
+
+int Rpl_info_dummy::do_remove_info(const ulong *uidx __attribute__((unused)),
+                                  const uint nidx __attribute__((unused)))
+{
+  if (abort) DBUG_ASSERT(0);
+  return 0;
+}
+
+bool Rpl_info_dummy::do_set_info(const int pos __attribute__((unused)),
+                                const char *value __attribute__((unused)))
+{
+  if (abort) DBUG_ASSERT(0);
+  return FALSE;
+}
+
+bool Rpl_info_dummy::do_set_info(const int pos __attribute__((unused)),
+                                const uchar *value __attribute__((unused)),
+                                const size_t size __attribute__((unused)))
+{
+  if (abort) DBUG_ASSERT(0);
+  return FALSE;
+}
+
+bool Rpl_info_dummy::do_set_info(const int pos __attribute__((unused)),
+                                const ulong value __attribute__((unused)))
+{
+  if (abort) DBUG_ASSERT(0);
+  return FALSE;
+}
+
+bool Rpl_info_dummy::do_set_info(const int pos __attribute__((unused)),
+                                const int value __attribute__((unused)))
+{
+  if (abort) DBUG_ASSERT(0);
+  return FALSE;
+}
+
+bool Rpl_info_dummy::do_set_info(const int pos __attribute__((unused)),
+                                const float value __attribute__((unused)))
+{
+  if (abort) DBUG_ASSERT(0);
+  return FALSE;
+}
+
+bool Rpl_info_dummy::do_set_info(const int pos __attribute__((unused)),
+                                const Dynamic_ids *value __attribute__((unused)))
+{
+  if (abort) DBUG_ASSERT(0);
+  return FALSE;
+}
+
+bool Rpl_info_dummy::do_get_info(const int pos __attribute__((unused)),
+                                char *value __attribute__((unused)),
+                                const size_t size __attribute__((unused)),
+                                const char *default_value __attribute__((unused)))
+{
+  if (abort) DBUG_ASSERT(0);
+  return FALSE;
+}
+
+bool Rpl_info_dummy::do_get_info(const int pos __attribute__((unused)),
+                                uchar *value __attribute__((unused)),
+                                const size_t size __attribute__((unused)),
+                                const uchar *default_value __attribute__((unused)))
+{
+  if (abort) DBUG_ASSERT(0);
+  return FALSE;
+}
+
+bool Rpl_info_dummy::do_get_info(const int pos __attribute__((unused)),
+                                ulong *value __attribute__((unused)),
+                                const ulong default_value __attribute__((unused)))
+{
+  if (abort) DBUG_ASSERT(0);
+  return FALSE;
+}
+
+bool Rpl_info_dummy::do_get_info(const int pos __attribute__((unused)),
+                                int *value __attribute__((unused)),
+                                const int default_value __attribute__((unused)))
+{
+  if (abort) DBUG_ASSERT(0);
+  return FALSE;
+}
+
+bool Rpl_info_dummy::do_get_info(const int pos __attribute__((unused)),
+                                float *value __attribute__((unused)),
+                                const float default_value __attribute__((unused)))
+{
+  if (abort) DBUG_ASSERT(0);
+  return FALSE;
+}
+
+bool Rpl_info_dummy::do_get_info(const int pos __attribute__((unused)),
+                                Dynamic_ids *value __attribute__((unused)),
+                                const Dynamic_ids *default_value __attribute__((unused)))
+{
+  if (abort) DBUG_ASSERT(0);
+  return FALSE;
+}
+
+char* Rpl_info_dummy::do_get_description_info()
+{
+  if (abort) DBUG_ASSERT(0);
+  return NULL;
+}
+
+bool Rpl_info_dummy::do_is_transactional()
+{
+  if (abort) DBUG_ASSERT(0);
+  return FALSE;
+}

=== added file 'sql/rpl_info_dummy.h'
--- a/sql/rpl_info_dummy.h	1970-01-01 00:00:00 +0000
+++ b/sql/rpl_info_dummy.h	2010-12-15 17:46:05 +0000
@@ -0,0 +1,73 @@
+/* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; version 2 of the License.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, write to the Free Software
+   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */
+
+#ifndef RPL_INFO_DUMMY_H
+#define RPL_INFO_DUMMY_H
+
+#include <my_global.h>
+#include <sql_priv.h>
+#include "rpl_info_handler.h"
+
+/**
+  Defines a dummy handler that should only be internally accessed.
+  This class is useful for debugging and performance tests.
+
+  The flag abort indicates if the execution should abort if some
+  methods are called. See the code for further details.
+*/
+class Rpl_info_dummy : public Rpl_info_handler
+{
+public:
+  Rpl_info_dummy(const int nparam);
+  virtual ~Rpl_info_dummy() { };
+
+private:
+  int do_init_info(const ulong *uidx, const uint nidx);
+  int do_check_info(const ulong *uidx, const uint nidx);
+  void do_end_info(const ulong *uidx, const uint nidx);
+  int do_flush_info(const ulong *uidx, const uint nidx,
+                    const bool force);
+  int do_remove_info(const ulong *uidx, const uint nidx);
+
+  int do_prepare_info_for_read(const uint nidx);
+  int do_prepare_info_for_write(const uint nidx);
+  bool do_set_info(const int pos, const char *value);
+  bool do_set_info(const int pos, const uchar *value,
+                   const size_t size);
+  bool do_set_info(const int pos, const int value);
+  bool do_set_info(const int pos, const ulong value);
+  bool do_set_info(const int pos, const float value);
+  bool do_set_info(const int pos, const Dynamic_ids *value);
+  bool do_get_info(const int pos, char *value, const size_t size,
+                   const char *default_value);
+  bool do_get_info(const int pos, uchar *value, const size_t size,
+                   const uchar *default_value);
+  bool do_get_info(const int pos, int *value,
+                   const int default_value);
+  bool do_get_info(const int pos, ulong *value,
+                   const ulong default_value);
+  bool do_get_info(const int pos, float *value,
+                   const float default_value);
+  bool do_get_info(const int pos, Dynamic_ids *value,
+                   const Dynamic_ids *default_value);
+  char* do_get_description_info();
+  bool do_is_transactional();
+
+  static const bool abort= FALSE;
+
+  Rpl_info_dummy& operator=(const Rpl_info_dummy& info);
+  Rpl_info_dummy(const Rpl_info_dummy& info);
+};
+#endif /* RPL_INFO_DUMMY_H */

=== modified file 'sql/rpl_info_factory.cc'
--- a/sql/rpl_info_factory.cc	2010-12-21 09:33:41 +0000
+++ b/sql/rpl_info_factory.cc	2010-12-27 18:54:41 +0000
@@ -18,12 +18,8 @@
 #include "rpl_slave.h"
 #include "rpl_info_factory.h"
 
-/*
-  We need to replace these definitions by an option that states the
-  engine one wants to use in the master info repository.
-*/
-#define master_info_engine NULL
-#define relay_log_info_engine NULL
+#define NUMBER_OF_FIELDS_TO_IDENTIFY_COORDINATOR 1
+#define NUMBER_OF_FIELDS_TO_IDENTIFY_WORKER 2
 
 /**
   Creates both a Master info and a Relay log info repository whose types are
@@ -39,10 +35,10 @@
   @retval FALSE No error
   @retval TRUE  Failure
 */ 
-bool Rpl_info_factory::create(uint mi_option, Master_info **mi,
-                              uint rli_option, Relay_log_info **rli)
+bool Rpl_info_factory::create_coordinators(uint mi_option, Master_info **mi,
+                                           uint rli_option, Relay_log_info **rli)
 {
-  DBUG_ENTER("Rpl_info_factory::Rpl_info_factory");
+  DBUG_ENTER("Rpl_info_factory::create_coordinators");
 
   if (!((*mi)= Rpl_info_factory::create_mi(mi_option)))
     DBUG_RETURN(TRUE);
@@ -82,45 +78,61 @@ Master_info *Rpl_info_factory::create_mi
   Master_info* mi= NULL;
   Rpl_info_file*  mi_file= NULL;
   Rpl_info_table*  mi_table= NULL;
+  ulong *key_info_idx= NULL;
   const char *msg= "Failed to allocate memory for the master info "
                    "structure";
 
-  DBUG_ENTER("Rpl_info_factory::Rpl_info_factory");
+  DBUG_ENTER("Rpl_info_factory::create_mi");
 
-  if (!(mi= new Master_info(
-#ifdef HAVE_PSI_INTERFACE
-                            &key_master_info_run_lock,
-                            &key_master_info_data_lock,
-                            &key_master_info_data_cond,
-                            &key_master_info_start_cond,
-                            &key_master_info_stop_cond
-#endif
-                           )))
+  if (!(mi= new Master_info()) ||
+      !(mi->registered_mutexes))
     goto err;
 
-  /*
-    Now we instantiate all info repos and later decide which one to take,
-    but not without first checking if there is already existing data for
-    a repo different from the one that is being requested.
-  */
-  if (!(mi_file= new Rpl_info_file(mi->get_number_info_mi_fields(),
-                                   master_info_file)))
-    goto err;
-
-  if (!(mi_table= new Rpl_info_table(mi->get_number_info_mi_fields() + 1,
-                                     MI_FIELD_ID, MI_SCHEMA, MI_TABLE)))
-    goto err;
+  if (!(key_info_idx= new ulong[NUMBER_OF_FIELDS_TO_IDENTIFY_COORDINATOR]))
+     goto err;
+  key_info_idx[0]= server_id;
+  mi->set_idx_info(key_info_idx, NUMBER_OF_FIELDS_TO_IDENTIFY_COORDINATOR);
 
   DBUG_ASSERT(mi_option == MI_REPOSITORY_FILE ||
-              mi_option == MI_REPOSITORY_TABLE);
+              mi_option == MI_REPOSITORY_TABLE ||
+              mi_option == MI_REPOSITORY_DUMMY);
 
-  if (decide_repository(mi, &mi_table, &mi_file,
+  if (mi_option == MI_REPOSITORY_FILE ||
+      mi_option == MI_REPOSITORY_TABLE)
+  {
+    /*
+      Now we instantiate all info repos and later decide which one to take,
+      but not without first checking if there is already existing data for
+      a repo different from the one that is being requested.
+    */
+    if (!(mi_file= new Rpl_info_file(mi->get_number_info_mi_fields(),
+                                   master_info_file)))
+      goto err;
+
+    if (!(mi_table= new Rpl_info_table(mi->get_number_info_mi_fields() + 1,
+                                     MI_SCHEMA, MI_TABLE)))
+      goto err;
+
+    /*
+      In a multi-master envinroment, we need to make sure that both master
+      info and relay log info are prepared to handle events from all
+      masters. In such case, we need to execute the code below for each
+      master and correctly set the key_info_idx. /Alfranio
+    */
+    if (decide_repository(mi, &mi_table, &mi_file,
                         mi_option == MI_REPOSITORY_TABLE, &msg))
-    goto err;
+      goto err;
 
-  if ((mi_option == MI_REPOSITORY_TABLE) &&
-       change_engine(static_cast<Rpl_info_table *>(mi_table),
-                     master_info_engine, &msg))
+    if ((mi_option == MI_REPOSITORY_TABLE) &&
+         change_engine(static_cast<Rpl_info_table *>(mi_table),
+                       master_info_engine, &msg))
+      goto err;
+
+    DBUG_RETURN(mi);
+  }
+
+  if (Rpl_info_factory::create_info_dummy(mi,
+                                          mi->get_number_info_mi_fields()))
     goto err;
 
   DBUG_RETURN(mi);
@@ -128,6 +140,11 @@ Master_info *Rpl_info_factory::create_mi
 err:
   if (mi_file) delete mi_file;
   if (mi_table) delete mi_table;
+  if (key_info_idx)
+  {
+    delete []key_info_idx;
+    mi->set_idx_info(NULL, 0);
+  }
   if (mi)
   {
     /*
@@ -162,45 +179,61 @@ Relay_log_info *Rpl_info_factory::create
   Relay_log_info *rli= NULL;
   Rpl_info_file* rli_file= NULL;
   Rpl_info_table* rli_table= NULL;
+  ulong *key_info_idx= NULL;
   const char *msg= "Failed to allocate memory for the relay log info "
                    "structure";
 
   DBUG_ENTER("Rpl_info_factory::create_rli");
 
-  if (!(rli= new Relay_log_info(is_slave_recovery
-#ifdef HAVE_PSI_INTERFACE
-                                ,&key_relay_log_info_run_lock,
-                                &key_relay_log_info_data_lock,
-                                &key_relay_log_info_data_cond,
-                                &key_relay_log_info_start_cond,
-                                &key_relay_log_info_stop_cond
-#endif
-                               )))
+  if (!(rli= new Relay_log_info(is_slave_recovery)) ||
+      !(rli->registered_mutexes))
     goto err;
 
-  /*
-    Now we instantiate all info repos and later decide which one to take,
-    but not without first checking if there is already existing data for
-    a repo different from the one that is being requested.
-  */
-  if (!(rli_file= new Rpl_info_file(rli->get_number_info_rli_fields(),
-                                    relay_log_info_file)))
-    goto err;
-
-  if (!(rli_table= new Rpl_info_table(rli->get_number_info_rli_fields() + 1,
-                                      RLI_FIELD_ID, RLI_SCHEMA, RLI_TABLE)))
-    goto err;
+  if (!(key_info_idx= new ulong[NUMBER_OF_FIELDS_TO_IDENTIFY_COORDINATOR]))
+     goto err;
+  key_info_idx[0]= server_id;
+  rli->set_idx_info(key_info_idx, NUMBER_OF_FIELDS_TO_IDENTIFY_COORDINATOR);
 
   DBUG_ASSERT(rli_option == RLI_REPOSITORY_FILE ||
-              rli_option == RLI_REPOSITORY_TABLE);
+              rli_option == RLI_REPOSITORY_TABLE ||
+              rli_option == RLI_REPOSITORY_DUMMY);
 
-  if (decide_repository(rli, &rli_table, &rli_file,
-                        rli_option == RLI_REPOSITORY_TABLE, &msg))
-    goto err;
+  if (rli_option == RLI_REPOSITORY_FILE ||
+      rli_option == RLI_REPOSITORY_TABLE)
+  {
+    /*
+      Now we instantiate all info repos and later decide which one to take,
+      but not without first checking if there is already existing data for
+      a repo different from the one that is being requested.
+    */
+    if (!(rli_file= new Rpl_info_file(rli->get_number_info_rli_fields(),
+                                      relay_log_info_file)))
+      goto err;
+
+    if (!(rli_table= new Rpl_info_table(rli->get_number_info_rli_fields() + 1,
+                                        RLI_SCHEMA, RLI_TABLE)))
+      goto err;
 
-  if ((rli_option == RLI_REPOSITORY_TABLE) &&
-      change_engine(static_cast<Rpl_info_table *>(rli_table),
-                    relay_log_info_engine, &msg))
+    /*
+      In a multi-master envinroment, we need to make sure that both master
+      info and relay log info are prepared to handle events from all
+      masters. In such case, we need to execute the code below for each
+      master and correctly set the key_info_idx. /Alfranio
+    */
+    if (decide_repository(rli, &rli_table, &rli_file,
+                          rli_option == RLI_REPOSITORY_TABLE, &msg))
+      goto err;
+
+    if ((rli_option == RLI_REPOSITORY_TABLE) &&
+        change_engine(static_cast<Rpl_info_table *>(rli_table),
+                      relay_log_info_engine, &msg))
+      goto err;
+
+    DBUG_RETURN(rli);
+  }
+
+  if (Rpl_info_factory::create_info_dummy(rli,
+                                          rli->get_number_info_rli_fields()))
     goto err;
 
   DBUG_RETURN(rli);
@@ -208,6 +241,11 @@ Relay_log_info *Rpl_info_factory::create
 err:
   if (rli_file) delete rli_file;
   if (rli_table) delete rli_table;
+  if (key_info_idx)
+  {
+    delete []key_info_idx;
+    rli->set_idx_info(NULL, 0);
+  }
   if (rli) 
   {
     /*
@@ -258,7 +296,8 @@ err:
   @retval FALSE No error
   @retval TRUE  Failure
 */
-bool Rpl_info_factory::decide_repository(Rpl_info *info, Rpl_info_table **table,
+bool Rpl_info_factory::decide_repository(Rpl_info *info,
+                                         Rpl_info_table **table,
                                          Rpl_info_file **file, bool is_table,
                                          const char **msg)
 {
@@ -266,8 +305,8 @@ bool Rpl_info_factory::decide_repository
   DBUG_ENTER("Rpl_info_factory::decide_repository");
  
   bool error= TRUE;
-  bool is_t= !((*table)->check_info());
-  bool is_f= !((*file)->check_info());
+  bool is_t= !((*table)->check_info(info->uidx, info->nidx));
+  bool is_f= !((*file)->check_info(info->uidx, info->nidx));
 
   if (is_t && is_f)
   {
@@ -281,7 +320,8 @@ bool Rpl_info_factory::decide_repository
   {
     if (!is_t && is_f)
     {
-      if ((*table)->init_info() || (*file)->init_info())
+      if ((*table)->init_info(info->uidx, info->nidx) ||
+          (*file)->init_info(info->uidx, info->nidx))
       {
         *msg= "Error transfering information from a file to a table.";
         goto err;
@@ -289,8 +329,13 @@ bool Rpl_info_factory::decide_repository
       /*
         Transfer the information from the file to the table and delete the
         file, i.e. Update the table (T) and delete the file (F).
+    
+        This process is not atomic and if the server crashes before removing
+        the file, the user needs to manualy remove it.
       */
-      if (info->copy_info(*file, *table) || (*file)->remove_info())
+      if (info->copy_info(*file, *table) ||
+          (*table)->flush_info(info->uidx, info->nidx, TRUE)
+          || (*file)->remove_info(info->uidx, info->nidx))
       {
         *msg= "Error transfering information from a file to a table.";
         goto err;
@@ -305,7 +350,8 @@ bool Rpl_info_factory::decide_repository
   {
     if (is_t && !is_f)
     {
-      if ((*table)->init_info() || (*file)->init_info())
+      if ((*table)->init_info(info->uidx, info->nidx) ||
+          (*file)->init_info(info->uidx, info->nidx))
       {
         *msg= "Error transfering information from a file to a table.";
         goto err;
@@ -314,8 +360,15 @@ bool Rpl_info_factory::decide_repository
         Transfer the information from the table to the file and delete 
         entries in the table, i.e. Update the file (F) and delete the
         table (T).
+
+        This process is not atomic and if the server crashes before deleting
+        entry in the table, the user needs to manualy remove the file and
+        repeat the process. Note that we assuming here single master rep.
+        This process needs to be changed if there are multi-masters.
       */
-      if (info->copy_info(*table, *file) || (*table)->remove_info())
+      if (info->copy_info(*table, *file) ||
+          (*file)->flush_info(info->uidx, info->nidx, TRUE)
+          || (*table)->remove_info(info->uidx, info->nidx))
       {
         *msg= "Error transfering information from a table to a file.";
         goto err;
@@ -344,7 +397,7 @@ err:
 bool Rpl_info_factory::change_engine(Rpl_info_table *table, const char *engine,
                                      const char **msg)
 {
-  DBUG_ENTER("Rpl_info_factory::decide_engine");
+  DBUG_ENTER("Rpl_info_factory::change_engine");
 
   if (engine && table->change_engine(engine))
   {
@@ -354,3 +407,113 @@ bool Rpl_info_factory::change_engine(Rpl
 
   DBUG_RETURN(FALSE);
 }
+
+Slave_worker *Rpl_info_factory::create_worker(uint worker_option, uint worker_id,
+                                              Relay_log_info *rli)
+{
+  DBUG_ENTER("Rpl_info_factory::create_worker");
+
+  char info_fname[FN_REFLEN];
+  char info_name[FN_REFLEN];
+  Rpl_info_file*  w_file= NULL;
+  Rpl_info_table* w_table= NULL;
+  ulong *key_info_idx= NULL;
+  Slave_worker *worker= NULL;
+  
+  const char *msg= "Failed to allocate memory for the Slave worker info "
+                   "structure";
+  /*
+    Defining the name of the worker and its repository.
+  */
+  char *pos= strmov(info_fname, relay_log_info_file);
+  sprintf(pos, ".%u", worker_id);
+  pos= strmov(info_name, "worker");
+  sprintf(pos, ".%u", worker_id);
+
+  if (!(worker= new Slave_worker(info_name, "worker", rli)))
+    goto err;
+
+  if (!(key_info_idx= new ulong[NUMBER_OF_FIELDS_TO_IDENTIFY_WORKER]))
+     goto err;
+  key_info_idx[0]= server_id;
+  key_info_idx[1]= worker_id;
+  worker->set_idx_info(key_info_idx, NUMBER_OF_FIELDS_TO_IDENTIFY_WORKER);
+  worker->id= worker_id;
+
+  DBUG_ASSERT(worker_option == WORKER_REPOSITORY_FILE ||
+              worker_option == WORKER_REPOSITORY_TABLE ||
+              worker_option == WORKER_REPOSITORY_DUMMY);
+
+  if (worker_option == WORKER_REPOSITORY_FILE ||
+      worker_option == WORKER_REPOSITORY_TABLE)
+  {
+    /*
+      Now we instantiate all info repos and later decide which one to take,
+      but not without first checking if there is already existing data for
+      a repo different from the one that is being requested.
+    */
+    if (!(w_file= new Rpl_info_file(worker->get_number_worker_fields(),
+                                    info_fname)))
+      goto err;
+
+    if (!(w_table= new Rpl_info_table(worker->get_number_worker_fields() + 2,
+                                      WORKER_SCHEMA, WORKER_TABLE)))
+      goto err;
+
+    /*
+     Check if this is conceptually right and what happens in case of
+     partial failures. Problems will not happen if we decide to use
+     only TABLES as repositories. /Alfranio
+    */
+    if (decide_repository(worker, &w_table, &w_file,
+                          worker_option == MI_REPOSITORY_TABLE, &msg))
+      goto err;
+
+    if ((worker_option == WORKER_REPOSITORY_TABLE) &&
+         change_engine(static_cast<Rpl_info_table *>(w_table),
+                       worker_info_engine, &msg))
+      goto err;
+
+    DBUG_RETURN(worker);
+  }
+
+  if (Rpl_info_factory::create_info_dummy(worker,
+                                          worker->get_number_worker_fields()))
+    goto err;
+
+  DBUG_RETURN(worker);
+
+err:
+  if (w_file) delete w_file;
+  if (w_table) delete w_table;
+  if (key_info_idx)
+  {
+    delete []key_info_idx;
+    worker->set_idx_info(NULL, 0);
+  }
+  if (worker)
+  {
+    /*
+      The handler was previously deleted so we need to remove
+      any reference to it.  
+    */
+    worker->set_rpl_info_handler(NULL);
+    delete worker;
+  }
+  sql_print_error("%s", msg);
+  DBUG_RETURN(NULL);
+}
+
+bool Rpl_info_factory::create_info_dummy(Rpl_info *rpl, uint nfields)
+{
+  DBUG_ENTER("Rpl_info_factory::create_info_dummy");
+
+  Rpl_info_dummy* rpl_dummy= NULL;
+
+  if (!(rpl_dummy= new Rpl_info_dummy(nfields)))
+    DBUG_RETURN(TRUE);
+
+  rpl->set_rpl_info_handler(rpl_dummy);
+
+  DBUG_RETURN(FALSE);
+}

=== modified file 'sql/rpl_info_factory.h'
--- a/sql/rpl_info_factory.h	2010-10-25 10:39:01 +0000
+++ b/sql/rpl_info_factory.h	2010-12-18 00:33:02 +0000
@@ -19,46 +19,61 @@
 #include "rpl_info.h"
 #include "rpl_mi.h"
 #include "rpl_rli.h"
+#include "rpl_rli_pdb.h"
 #include "rpl_info_file.h"
 #include "rpl_info_table.h"
+#include "rpl_info_dummy.h"
 #include "rpl_info_handler.h"
 
 enum enum_mi_repository
 {
   MI_REPOSITORY_FILE= 0,
-  MI_REPOSITORY_TABLE= 1
+  MI_REPOSITORY_TABLE= 1,
+  MI_REPOSITORY_DUMMY= 2
 };
 extern ulong opt_mi_repository_id;
 
 enum enum_rli_repository
 {
   RLI_REPOSITORY_FILE= 0,
-  RLI_REPOSITORY_TABLE= 1
+  RLI_REPOSITORY_TABLE= 1,
+  RLI_REPOSITORY_DUMMY= 2
 };
 extern ulong opt_rli_repository_id;
 
-#define MI_FIELD_ID 0
+enum enum_worker_repository
+{
+  WORKER_REPOSITORY_FILE= 0,
+  WORKER_REPOSITORY_TABLE= 1,
+  WORKER_REPOSITORY_DUMMY= 2
+};
+extern ulong opt_worker_repository_id;
 
 #define MI_SCHEMA "mysql"
 #define MI_TABLE  "slave_master_info"
 
-#define RLI_FIELD_ID 0
-
 #define RLI_SCHEMA "mysql"
 #define RLI_TABLE  "slave_relay_log_info"
 
+#define WORKER_SCHEMA "mysql"
+#define WORKER_TABLE  "slave_worker_info"
+
 class Rpl_info_factory
 {
   public:
 
-  static bool create(uint mi_option, Master_info **mi,
-                     uint rli_option, Relay_log_info **rli);
+  static bool create_coordinators(uint mi_option, Master_info **mi,
+                                  uint rli_option, Relay_log_info **rli);
+  static Slave_worker *create_worker(uint rli_option, uint worker_id,
+                                     Relay_log_info *rli);
   static Master_info *create_mi(uint rli_option);
   static Relay_log_info *create_rli(uint rli_option, bool is_slave_recovery);
-  static bool decide_repository(Rpl_info *info, Rpl_info_table **table,
+  static bool decide_repository(Rpl_info *info,
+                                Rpl_info_table **table,
                                 Rpl_info_file **file, bool is_table,
                                 const char **msg);
   static bool change_engine(Rpl_info_table *table, const char *engine,
                             const char **msg);
+ static bool create_info_dummy(Rpl_info *rpl, uint nfields);
 };
 #endif

=== modified file 'sql/rpl_info_file.cc'
--- a/sql/rpl_info_file.cc	2010-10-29 09:07:21 +0000
+++ b/sql/rpl_info_file.cc	2010-12-15 17:46:05 +0000
@@ -39,7 +39,8 @@ Rpl_info_file::Rpl_info_file(const int n
   DBUG_VOID_RETURN;
 }
 
-int Rpl_info_file::do_init_info()
+int Rpl_info_file::do_init_info(const ulong *uidx __attribute__((unused)),
+                                const uint nidx __attribute__((unused)))
 {
   int error= 0;
 
@@ -47,7 +48,7 @@ int Rpl_info_file::do_init_info()
   DBUG_ENTER("Rpl_info_file::do_init_info");
 
   /* does info file exist ? */
-  if (do_check_info())
+  if (do_check_info(uidx, nidx))
   {
     /*
       If someone removed the file from underneath our feet, just close
@@ -106,26 +107,31 @@ file '%s')", info_fname);
   DBUG_RETURN(error);
 }
 
-int Rpl_info_file::do_prepare_info_for_read()
+int Rpl_info_file::do_prepare_info_for_read(const uint nidx
+                                            __attribute__((unused)))
 {
   cursor= 0;
   prv_error= FALSE;
   return (reinit_io_cache(&info_file, READ_CACHE, 0L, 0, 0));
 }
 
-int Rpl_info_file::do_prepare_info_for_write()
+int Rpl_info_file::do_prepare_info_for_write(const uint nidx
+                                             __attribute__((unused)))
 {
   cursor= 0;
   prv_error= FALSE;
   return (reinit_io_cache(&info_file, WRITE_CACHE, 0L, 0, 1));
 }
 
-int Rpl_info_file::do_check_info()
+int Rpl_info_file::do_check_info(const ulong *uidx __attribute__((unused)),
+                                 const uint nidx __attribute__((unused)))
 {
   return (access(info_fname,F_OK));
 }
 
-int Rpl_info_file::do_flush_info(const bool force)
+int Rpl_info_file::do_flush_info(const ulong *uidx,
+                                 const uint nidx,
+                                 const bool force)
 {
   int error= 0;
 
@@ -145,7 +151,8 @@ int Rpl_info_file::do_flush_info(const b
   DBUG_RETURN(error);
 }
 
-void Rpl_info_file::do_end_info()
+void Rpl_info_file::do_end_info(const ulong *uidx __attribute__((unused)),
+                                const uint nidx __attribute__((unused)))
 {
   DBUG_ENTER("Rpl_info_file::do_end_info");
 
@@ -159,7 +166,8 @@ void Rpl_info_file::do_end_info()
   DBUG_VOID_RETURN;
 }
 
-int Rpl_info_file::do_remove_info()
+int Rpl_info_file::do_remove_info(const ulong *uidx __attribute__((unused)),
+                                  const uint nidx __attribute__((unused)))
 {
   MY_STAT stat_area;
   int error= 0;
@@ -178,6 +186,12 @@ bool Rpl_info_file::do_set_info(const in
           FALSE : TRUE);
 }
 
+bool Rpl_info_file::do_set_info(const int pos, const uchar *value,
+                                const size_t size)
+{
+  return (my_b_write(&info_file, value, size));
+}
+
 bool Rpl_info_file::do_set_info(const int pos, const ulong value)
 {
   return (my_b_printf(&info_file, "%lu\n", value) > (size_t) 0 ?
@@ -211,7 +225,7 @@ bool Rpl_info_file::do_set_info(const in
           FALSE : TRUE);
 }
 
-bool Rpl_info_file::do_set_info(const int pos, const Server_ids *value)
+bool Rpl_info_file::do_set_info(const int pos, const Dynamic_ids *value)
 {
   bool error= TRUE;
   String buffer;
@@ -219,7 +233,7 @@ bool Rpl_info_file::do_set_info(const in
   /*
     This produces a line listing the total number and all the server_ids.
   */
-  if (const_cast<Server_ids *>(value)->pack_server_ids(&buffer))
+  if (const_cast<Dynamic_ids *>(value)->pack_dynamic_ids(&buffer))
     goto err;
 
   error= (my_b_printf(&info_file, "%s\n", buffer.c_ptr_safe()) >
@@ -235,6 +249,12 @@ bool Rpl_info_file::do_get_info(const in
                                 default_value));
 }
 
+bool Rpl_info_file::do_get_info(const int pos, uchar *value, const size_t size,
+                                const uchar *default_value)
+{
+  return(my_b_read(&info_file, value, size));
+}
+
 bool Rpl_info_file::do_get_info(const int pos, ulong *value,
                                 const ulong default_value)
 {
@@ -256,8 +276,8 @@ bool Rpl_info_file::do_get_info(const in
                                   default_value));
 }
 
-bool Rpl_info_file::do_get_info(const int pos, Server_ids *value,
-                                const Server_ids *default_value __attribute__((unused)))
+bool Rpl_info_file::do_get_info(const int pos, Dynamic_ids *value,
+                                const Dynamic_ids *default_value __attribute__((unused)))
 {
   /*
     Static buffer to use most of the times. However, if it is not big
@@ -271,7 +291,7 @@ bool Rpl_info_file::do_get_info(const in
                                              &buffer_act,
                                              &info_file);
   if (!error)
-    value->unpack_server_ids(buffer_act);
+    value->unpack_dynamic_ids(buffer_act);
 
   if (buffer != buffer_act)
   {

=== modified file 'sql/rpl_info_file.h'
--- a/sql/rpl_info_file.h	2010-10-25 10:39:01 +0000
+++ b/sql/rpl_info_file.h	2010-12-15 17:46:05 +0000
@@ -41,29 +41,34 @@ private:
   /* IO_CACHE of the info file - set only during init or end */
   IO_CACHE info_file;
 
-  int do_init_info();
-  int do_check_info();
-  void do_end_info();
-  int do_flush_info(const bool force);
-  int do_remove_info();
+  int do_init_info(const ulong *uidx, const uint nidx);
+  int do_check_info(const ulong *uidx, const uint nidx);
+  void do_end_info(const ulong *uidx, const uint nidx);
+  int do_flush_info(const ulong *uidx, const uint nidx,
+                    const bool force);
+  int do_remove_info(const ulong *uidx, const uint nidx);
 
-  int do_prepare_info_for_read();
-  int do_prepare_info_for_write();
+  int do_prepare_info_for_read(const uint nidx);
+  int do_prepare_info_for_write(const uint nidx);
   bool do_set_info(const int pos, const char *value);
+  bool do_set_info(const int pos, const uchar *value,
+                   const size_t size);
   bool do_set_info(const int pos, const int value);
   bool do_set_info(const int pos, const ulong value);
   bool do_set_info(const int pos, const float value);
-  bool do_set_info(const int pos, const Server_ids *value);
+  bool do_set_info(const int pos, const Dynamic_ids *value);
   bool do_get_info(const int pos, char *value, const size_t size,
                    const char *default_value);
+  bool do_get_info(const int pos, uchar *value, const size_t size,
+                   const uchar *default_value);
   bool do_get_info(const int pos, int *value,
                    const int default_value);
   bool do_get_info(const int pos, ulong *value,
                    const ulong default_value);
   bool do_get_info(const int pos, float *value,
                    const float default_value);
-  bool do_get_info(const int pos, Server_ids *value,
-                   const Server_ids *default_value);
+  bool do_get_info(const int pos, Dynamic_ids *value,
+                   const Dynamic_ids *default_value);
   char* do_get_description_info();
   bool do_is_transactional();
 

=== modified file 'sql/rpl_info_handler.h'
--- a/sql/rpl_info_handler.h	2010-10-25 10:39:01 +0000
+++ b/sql/rpl_info_handler.h	2010-12-15 17:46:05 +0000
@@ -17,7 +17,7 @@
 #define RPL_INFO_HANDLER_H
 
 #include <my_global.h>
-#include <server_ids.h>
+#include <dynamic_ids.h>
 #include "rpl_info_values.h"
 
 class Rpl_info_handler
@@ -30,26 +30,32 @@ public:
     After creating an object and assembling components, this method is
     used to initialize internal structures. Everything that does not
     depend on other components (e.g. mutexes) should be placed in the
-    object's constructor though. 
+    object's constructor though.
+
+    @param[in] uidx Array of fields that identifies an object
+    @param[in] nidx Number of fields in the array
 
     @retval FALSE success,
     @retval TRUE  otherwise error.
   */
-  int init_info()
+  int init_info(const ulong *uidx, const uint nidx)
   {
-    return do_init_info();
+    return do_init_info(uidx, nidx);
   }
 
   /**
     Checks if any necessary dependency is satisfied such as a
     file exists.
 
+    @param[in] uidx Array of fields that identifies an object
+    @param[in] nidx Number of fields in the array
+
     @retval FALSE success,
     @retval TRUE  otherwise error.
   */
-  int check_info()
+  int check_info(const ulong *uidx, const uint nidx)
   {
-    return do_check_info();
+    return do_check_info(uidx, nidx);
   }
 
   /**
@@ -65,36 +71,44 @@ public:
     system, it may happen that the changes will only end up in the
     operating system's cache and a crash may lead to inconsistencies.
 
+    @param[in] uidx  Array of fields that identifies an object
+    @param[in] nidx  Number of fields in the array
     @param[in] force Always sync the information.
 
     @retval FALSE No error
     @retval TRUE  Failure
   */
-  int flush_info(const bool force)
+  int flush_info(const ulong *uidx, const uint nidx, const bool force)
   {
-    return do_flush_info(force);
+    return do_flush_info(uidx, nidx, force);
   }
 
   /**
     Deletes any information in the repository.
 
+    @param[in] uidx Array of fields that identifies an object
+    @param[in] nidx Number of fields in the array
+
     @retval FALSE No error
     @retval TRUE  Failure
   */
-  int remove_info()
+  int remove_info(const ulong *uidx, const uint nidx)
   {
-    return do_remove_info();
+    return do_remove_info(uidx, nidx);
   }
 
   /**
     Closes access to the repository.
 
+    @param[in] uidx Array of fields that identifies an object
+    @param[in] nidx Number of fields in the array
+
     @retval FALSE No error
     @retval TRUE  Failure
   */
-  void end_info()
+  void end_info(const ulong *uidx, const uint nidx)
   {
-    do_end_info();
+    do_end_info(uidx, nidx);
   }
 
   /**
@@ -104,9 +118,9 @@ public:
     @retval FALSE No error
     @retval TRUE  Failure
   */
-  int prepare_info_for_read()
+  int prepare_info_for_read(const uint nidx)
   {
-    return (do_prepare_info_for_read());
+    return (do_prepare_info_for_read(nidx));
   }
 
   /**
@@ -116,9 +130,9 @@ public:
     @retval FALSE No error
     @retval TRUE  Failure
   */
-  int prepare_info_for_write()
+  int prepare_info_for_write(const uint nidx)
   {
-    return (do_prepare_info_for_write());
+    return (do_prepare_info_for_write(nidx));
   }
 
   /**
@@ -144,6 +158,18 @@ public:
     return(prv_error);
   }
 
+  template <class TypeHandler>
+  bool set_info(TypeHandler const value, const size_t size)
+  {
+    if (cursor >= ninfo || prv_error)
+      return TRUE;
+
+    if (!(prv_error= do_set_info(cursor, value, size)))
+      cursor++;
+
+    return(prv_error);
+  }
+
   /**
     Returns the value of a field.
     Any call must be done in the right order which
@@ -185,8 +211,9 @@ public:
     @retval FALSE No error
     @retval TRUE Failure
   */
-  bool get_info(char *value, const size_t size,
-                const char *default_value)
+  template <class TypeHandler>
+  bool get_info(TypeHandler value, const size_t size,
+                TypeHandler const default_value)
   {
     if (cursor >= ninfo || prv_error)
       return TRUE;
@@ -210,8 +237,8 @@ public:
     @retval FALSE No error
     @retval TRUE Failure
   */
-  bool get_info(Server_ids *value,
-                const Server_ids *default_value)
+  bool get_info(Dynamic_ids *value,
+                const Dynamic_ids *default_value)
   {
     if (cursor >= ninfo || prv_error)
       return TRUE;
@@ -290,29 +317,36 @@ protected:
   uint sync_period;
 
 private:
-  virtual int do_init_info()= 0;
-  virtual int do_check_info()= 0;
-  virtual int do_flush_info(const bool force)= 0;
-  virtual int do_remove_info()= 0;
-  virtual void do_end_info()= 0;
-  virtual int do_prepare_info_for_read()= 0;
-  virtual int do_prepare_info_for_write()= 0;
+  virtual int do_init_info(const ulong *uidx, const uint nidx)= 0;
+  virtual int do_check_info(const ulong *uidx, const uint nidx)= 0;
+  virtual int do_flush_info(const ulong *uidx, const uint nidx,
+                            const bool force)= 0;
+  virtual int do_remove_info(const ulong *uidx, const uint nidx)= 0;
+  virtual void do_end_info(const ulong *uidx, const uint nidx)= 0;
+  virtual int do_prepare_info_for_read(const uint nidx)= 0;
+  virtual int do_prepare_info_for_write(const uint nidx)= 0;
 
   virtual bool do_set_info(const int pos, const char *value)= 0;
+  virtual bool do_set_info(const int pos, const uchar *value,
+                           const size_t size)= 0;
   virtual bool do_set_info(const int pos, const ulong value)= 0;
   virtual bool do_set_info(const int pos, const int value)= 0;
   virtual bool do_set_info(const int pos, const float value)= 0;
-  virtual bool do_set_info(const int pos, const Server_ids *value)= 0;
-  virtual bool do_get_info(const int pos, char *value, const size_t size,
+  virtual bool do_set_info(const int pos, const Dynamic_ids *value)= 0;
+  virtual bool do_get_info(const int pos, char *value,
+                           const size_t size,
                            const char *default_value)= 0;
+  virtual bool do_get_info(const int pos, uchar *value,
+                           const size_t size,
+                           const uchar *default_value)= 0;
   virtual bool do_get_info(const int pos, ulong *value,
                            const ulong default_value)= 0;
   virtual bool do_get_info(const int pos, int *value,
                            const int default_value)= 0;
   virtual bool do_get_info(const int pos, float *value,
                            const float default_value)= 0;
-  virtual bool do_get_info(const int pos, Server_ids *value,
-                           const Server_ids *default_value)= 0;
+  virtual bool do_get_info(const int pos, Dynamic_ids *value,
+                           const Dynamic_ids *default_value)= 0;
   virtual char* do_get_description_info()= 0;
   virtual bool do_is_transactional()= 0;
 

=== modified file 'sql/rpl_info_table.cc'
--- a/sql/rpl_info_table.cc	2010-12-03 00:15:40 +0000
+++ b/sql/rpl_info_table.cc	2010-12-21 17:30:50 +0000
@@ -15,12 +15,12 @@
 
 #include "rpl_info_table.h"
 #include "rpl_utility.h"
-#include "sql_parse.h"
+#include "sql_prepare.h"
 
-Rpl_info_table::Rpl_info_table(uint nparam, uint param_field_idx,
+Rpl_info_table::Rpl_info_table(uint nparam,
                                const char* param_schema,
                                const char *param_table)
-:Rpl_info_handler(nparam), field_idx(param_field_idx)
+:Rpl_info_handler(nparam), is_transactional(FALSE)
 {
   str_schema.str= str_table.str= NULL;
   str_schema.length= str_table.length= 0;
@@ -65,7 +65,7 @@ Rpl_info_table::~Rpl_info_table()
     my_free(str_schema.str);
 }
 
-int Rpl_info_table::do_init_info()
+int Rpl_info_table::do_init_info(const ulong *uidx, const uint nidx)
 {
   int error= 1;
   enum enum_return_id res= FOUND_ID;
@@ -89,11 +89,10 @@ int Rpl_info_table::do_init_info()
     goto end;
 
   /*
-    Points the cursor at the row to be read where the master_id equals to
-    the server_id.
+    Points the cursor at the row to be read.
   */
-  if ((res= access->find_info_for_server_id(server_id, field_idx,
-                                            field_values, table)) == FOUND_ID)
+  if ((res= access->find_info(uidx, nidx, field_values,
+                              table)) == FOUND_ID)
   {
     /*
       Reads the information stored in the rpl_info table into a
@@ -116,7 +115,8 @@ end:
   DBUG_RETURN(error);
 }
 
-int Rpl_info_table::do_flush_info(const bool force)
+int Rpl_info_table::do_flush_info(const ulong *uidx, const uint nidx,
+                                  const bool force)
 {
   int error= 1;
   enum enum_return_id res= FOUND_ID;
@@ -145,12 +145,11 @@ int Rpl_info_table::do_flush_info(const 
     goto end;
 
   /*
-    Points the cursor at the row to be read where the master_id
-    equals to the server_id. If the row is not found an error is
-    reported.
+    Points the cursor at the row to be read. If the row is not found
+    an error is reported.
   */
-  if ((res= access->find_info_for_server_id(server_id, field_idx,
-                                            field_values, table)) == NOT_FOUND_ID)
+  if ((res= access->find_info(uidx, nidx, field_values,
+                              table)) == NOT_FOUND_ID)
   {
     /*
       Prepares the information to be stored before calling ha_write_row.
@@ -214,7 +213,7 @@ end:
   DBUG_RETURN(error);
 }
 
-int Rpl_info_table::do_remove_info()
+int Rpl_info_table::do_remove_info(const ulong *uidx, const uint nidx)
 {
   int error= 1;
   enum enum_return_id res= FOUND_ID;
@@ -238,12 +237,11 @@ int Rpl_info_table::do_remove_info()
     goto end;
 
   /*
-    Points the cursor at the row to be deleted where the the master_id
-    equals to the server_id. If the row is not found, the execution
-    proceeds normally.
+    Points the cursor at the row to be deleted. If the row is not
+    found, the execution proceeds normally.
   */
-  if ((res= access->find_info_for_server_id(server_id, field_idx,
-                                            field_values, table)) == FOUND_ID)
+  if ((res= access->find_info(uidx, nidx, field_values,
+                              table)) == FOUND_ID)
   {
     /*
       Deletes a row in the rpl_info table.
@@ -266,7 +264,7 @@ end:
   DBUG_RETURN(error);
 }
 
-int Rpl_info_table::do_check_info()
+int Rpl_info_table::do_check_info(const ulong *uidx, const uint nidx)
 {
   int error= 1;
   TABLE *table= NULL;
@@ -288,12 +286,11 @@ int Rpl_info_table::do_check_info()
     goto end;
 
   /*
-    Points the cursor at the row to be deleted where the the master_id
-    equals to the server_id. If the row is not found, an error is
-    reported.
+    Points the cursor at the row to be cheked. If the row is not
+    found, the execution proceeds normally.
   */
-  if (access->find_info_for_server_id(server_id, field_idx,
-                                      field_values, table) != FOUND_ID)
+  if (access->find_info(uidx, nidx, field_values,
+                        table) != FOUND_ID)
   {
     /* 
        We cannot simply call my_error here because it does not
@@ -314,23 +311,23 @@ end:
   DBUG_RETURN(error);
 }
 
-void Rpl_info_table::do_end_info()
+void Rpl_info_table::do_end_info(const ulong *uidx, const uint nidx)
 {
 }
 
-int Rpl_info_table::do_prepare_info_for_read()
+int Rpl_info_table::do_prepare_info_for_read(const uint nidx)
 {
   if (!field_values)
     return TRUE;
 
-  cursor= 1;
+  cursor= nidx;
 
   return FALSE;
 }
 
-int Rpl_info_table::do_prepare_info_for_write()
+int Rpl_info_table::do_prepare_info_for_write(const uint nidx)
 {
-  return(do_prepare_info_for_read());
+  return(do_prepare_info_for_read(nidx));
 }
 
 bool Rpl_info_table::do_set_info(const int pos, const char *value)
@@ -339,6 +336,13 @@ bool Rpl_info_table::do_set_info(const i
                                         &my_charset_bin));
 }
 
+bool Rpl_info_table::do_set_info(const int pos, const uchar *value,
+                                 const size_t size)
+{
+  return (field_values->value[pos].copy((char *) value, size,
+                                        &my_charset_bin));
+}
+
 bool Rpl_info_table::do_set_info(const int pos, const ulong value)
 {
   return (field_values->value[pos].set_int(value, TRUE,
@@ -357,9 +361,9 @@ bool Rpl_info_table::do_set_info(const i
                                             &my_charset_bin));
 }
 
-bool Rpl_info_table::do_set_info(const int pos, const Server_ids *value)
+bool Rpl_info_table::do_set_info(const int pos, const Dynamic_ids *value)
 {
-  if (const_cast<Server_ids *>(value)->pack_server_ids(&field_values->value[pos]))
+  if (const_cast<Dynamic_ids *>(value)->pack_dynamic_ids(&field_values->value[pos]))
     return TRUE;
 
   return FALSE;
@@ -379,6 +383,15 @@ bool Rpl_info_table::do_get_info(const i
   return FALSE;
 }
 
+bool Rpl_info_table::do_get_info(const int pos, uchar *value, const size_t size,
+                                 const uchar *default_value __attribute__((unused)))
+{
+  if (field_values->value[pos].length() == size)
+    return (!memcpy((char *) value, (char *)
+            field_values->value[pos].c_ptr_safe(), size));
+  return TRUE;
+}
+
 bool Rpl_info_table::do_get_info(const int pos, ulong *value,
                                  const ulong default_value)
 {
@@ -431,10 +444,10 @@ bool Rpl_info_table::do_get_info(const i
   return TRUE;
 }
 
-bool Rpl_info_table::do_get_info(const int pos, Server_ids *value,
-                                 const Server_ids *default_value __attribute__((unused)))
+bool Rpl_info_table::do_get_info(const int pos, Dynamic_ids *value,
+                                 const Dynamic_ids *default_value __attribute__((unused)))
 {
-  if (value->unpack_server_ids(field_values->value[pos].c_ptr_safe()))
+  if (value->unpack_dynamic_ids(field_values->value[pos].c_ptr_safe()))
     return TRUE;
 
   return FALSE;
@@ -447,16 +460,37 @@ char* Rpl_info_table::do_get_description
 
 bool Rpl_info_table::do_is_transactional()
 {
+  return is_transactional;
+}
+
+bool Rpl_info_table::change_engine(const char *engine)
+{
+  bool error= TRUE;
   ulong saved_mode;
   TABLE *table= NULL;
   Open_tables_backup backup;
-  bool is_trans= FALSE;
-
-  DBUG_ENTER("Rpl_info_table::do_is_transactional");
+  String query;
+  LEX_STRING lex;
+ 
+  DBUG_ENTER("Rpl_info_table::do_check_info");
 
   THD *thd= access->create_thd();
-
+  Ed_connection conn(thd);
   saved_mode= thd->variables.sql_mode;
+  tmp_disable_binlog(thd);
+
+  if (query.append(STRING_WITH_LEN("ALTER TABLE ")) ||
+      query.append(description) ||
+      query.append(STRING_WITH_LEN(" ENGINE= ")) ||
+      query.append(engine) ||
+      query.append(";"))
+     goto err;
+
+  lex.str= query.c_ptr_safe();
+  lex.length= query.length();
+
+  if ((error= conn.execute_direct(lex)))
+    goto err;
 
   /*
     Opens and locks the rpl_info table before accessing it.
@@ -464,30 +498,11 @@ bool Rpl_info_table::do_is_transactional
   if (!access->open_table(thd, str_schema, str_table,
                           get_number_info(), TL_READ,
                           &table, &backup))
-    is_trans= table->file->has_transactions();
+    is_transactional= table->file->has_transactions();
 
   access->close_table(thd, table, &backup, 0);
-  thd->variables.sql_mode= saved_mode;
-  access->drop_thd(thd);
-  DBUG_RETURN(is_trans);
-}
-
-bool Rpl_info_table::change_engine(const char *engine)
-{
-  bool error= TRUE;
-  ulong saved_mode;
-
-  DBUG_ENTER("Rpl_info_table::do_check_info");
-
-  THD *thd= access->create_thd();
-
-  saved_mode= thd->variables.sql_mode;
-  tmp_disable_binlog(thd);
-
-  /* TODO: Change the engine using internal functions */
-
-  error= FALSE;
 
+err:
   reenable_binlog(thd);
   thd->variables.sql_mode= saved_mode;
   access->drop_thd(thd);

=== modified file 'sql/rpl_info_table.h'
--- a/sql/rpl_info_table.h	2010-10-25 10:39:01 +0000
+++ b/sql/rpl_info_table.h	2010-12-15 17:46:05 +0000
@@ -22,7 +22,7 @@
 class Rpl_info_table : public Rpl_info_handler
 {
 public:
-  Rpl_info_table(uint nparam, uint param_field_id, const char* param_schema,
+  Rpl_info_table(uint nparam, const char* param_schema,
                  const char *param_table);
   virtual ~Rpl_info_table();
   /*
@@ -49,11 +49,6 @@ private:
   LEX_STRING str_table;
 
   /*
-    This property indentifies the id/position of the field that is
-    used as primary key.
-  */
-  uint field_idx;
-  /*
     This property represents a description of the repository.
     Speciffically, "schema"."table".
   */
@@ -65,29 +60,40 @@ private:
   */
   Rpl_info_table_access *access;
 
-  int do_init_info();
-  int do_check_info();
-  void do_end_info();
-  int do_flush_info(const bool force);
-  int do_remove_info();
+  /*
+    Identifies if a table is transactional or non-transactional.
+    This is used to provide a crash-safe behaviour.
+  */
+  bool is_transactional;       
+
+  int do_init_info(const ulong *uidx, const uint nidx);
+  int do_check_info(const ulong *uidx, const uint nidx);
+  void do_end_info(const ulong *uidx, const uint nidx);
+  int do_flush_info(const ulong *uidx, const uint nidx,
+                    const bool force);
+  int do_remove_info(const ulong *uidx, const uint nidx);
 
-  int do_prepare_info_for_read();
-  int do_prepare_info_for_write();
+  int do_prepare_info_for_read(const uint nidx);
+  int do_prepare_info_for_write(const uint nidx);
   bool do_set_info(const int pos, const char *value);
+  bool do_set_info(const int pos, const uchar *value,
+                   const size_t size);
   bool do_set_info(const int pos, const int value);
   bool do_set_info(const int pos, const ulong value);
   bool do_set_info(const int pos, const float value);
-  bool do_set_info(const int pos, const Server_ids *value);
+  bool do_set_info(const int pos, const Dynamic_ids *value);
   bool do_get_info(const int pos, char *value, const size_t size,
                    const char *default_value);
+  bool do_get_info(const int pos, uchar *value, const size_t size,
+                   const uchar *default_value);
   bool do_get_info(const int pos, int *value,
                    const int default_value);
   bool do_get_info(const int pos, ulong *value,
                    const ulong default_value);
   bool do_get_info(const int pos, float *value,
                    const float default_value);
-  bool do_get_info(const int pos, Server_ids *value,
-                   const Server_ids *default_value);
+  bool do_get_info(const int pos, Dynamic_ids *value,
+                   const Dynamic_ids *default_value);
   char* do_get_description_info();
   bool do_is_transactional();
 

=== modified file 'sql/rpl_info_table_access.cc'
--- a/sql/rpl_info_table_access.cc	2010-10-27 11:20:32 +0000
+++ b/sql/rpl_info_table_access.cc	2010-12-21 17:30:50 +0000
@@ -160,8 +160,8 @@ bool Rpl_info_table_access::close_table(
 
   In case search succeeded, the table cursor points to the found row.
 
-  @param[in]      server_id    Server id
-  @param[in]      idx          Index field
+  @param[in]      uidx         Array of fields in pk
+  @param[in]      nidx         Number of fields in pk
   @param[in,out]  field_values The sequence of values
   @param[in,out]  table        Table
 
@@ -170,34 +170,53 @@ bool Rpl_info_table_access::close_table(
     @retval NOT_FOUND The row was not found.
     @retval ERROR     There was a failure.
 */
-enum enum_return_id Rpl_info_table_access::find_info_for_server_id(ulong server_id,
-                                                                   uint idx,
-                                                                   Rpl_info_values *field_values,
-                                                                   TABLE *table)
+enum enum_return_id Rpl_info_table_access::find_info(const ulong *uidx,
+                                                     const uint nidx,
+                                                     Rpl_info_values *field_values,
+                                                     TABLE *table)
 {
   uchar key[MAX_KEY_LENGTH];
-  DBUG_ENTER("Rpl_info_table_access::find_info_for_server_id");
+  DBUG_ENTER("Rpl_info_table_access::find_info");
 
-  field_values->value[idx].set_int(server_id, TRUE, &my_charset_bin);
-
-  if (field_values->value[idx].length() > table->field[idx]->field_length)
+  /* 
+     There is a primary key which is the first key among the
+     set of keys and it is enabled.
+  */
+  if (!(table->s->primary_key == 0 &&
+      table->s->keys_in_use.is_set(table->s->primary_key) &&
+      table->key_info->key_parts == nidx))
     DBUG_RETURN(ERROR_ID);
 
-  table->field[idx]->store(field_values->value[idx].c_ptr_safe(),
-                           field_values->value[idx].length(),
-                           &my_charset_bin);
-
-  if (!(table->field[idx]->flags & PRI_KEY_FLAG) &&
-      table->s->keys_in_use.is_set(0))
-    DBUG_RETURN(ERROR_ID);
+  uint offset_idx= table->s->primary_key;
 
+  for (uint idx= 0; idx < nidx; idx++)
+  {
+    /*
+      Fields that are part of a primary key are contiguous in
+      table's definition. If we want to release this we need
+      to iterate through the keyinfo.
+    */
+    if (!(table->field[idx + offset_idx]->flags & PRI_KEY_FLAG))
+      DBUG_RETURN(ERROR_ID);
+
+    /*
+      The size of the field must be great to store data.
+    */
+    if (field_values->value[idx + offset_idx].length() >
+        table->field[idx + offset_idx]->field_length)
+      DBUG_RETURN(ERROR_ID);
+
+    field_values->value[idx + offset_idx].set_int(uidx[idx], TRUE, &my_charset_bin);
+
+    table->field[idx + offset_idx]->store(field_values->value[idx + offset_idx].c_ptr_safe(),
+                             field_values->value[idx + offset_idx].length(),
+                             &my_charset_bin);
+  }
   key_copy(key, table->record[0], table->key_info, table->key_info->key_length);
 
   if (table->file->ha_index_read_idx_map(table->record[0], 0, key, HA_WHOLE_KEY,
                                          HA_READ_KEY_EXACT))
-  {
     DBUG_RETURN(NOT_FOUND_ID);
-  }
 
   DBUG_RETURN(FOUND_ID);
 }
@@ -227,7 +246,7 @@ bool Rpl_info_table_access::load_info_va
   {
     fields[field_idx]->val_str(&str);
     field_values->value[field_idx].copy(str.c_ptr_safe(), str.length(),
-                                             &my_charset_bin);
+                                        &my_charset_bin);
     field_idx++;
   }
 
@@ -256,6 +275,7 @@ bool Rpl_info_table_access::store_info_v
   while (field_idx < max_num_field)
   {
     fields[field_idx]->set_notnull();
+
     if (fields[field_idx]->store(field_values->value[field_idx].c_ptr_safe(),
                                  field_values->value[field_idx].length(),
                                  &my_charset_bin))
@@ -288,6 +308,7 @@ THD *Rpl_info_table_access::create_thd()
     thd= new THD;
     thd->thread_stack= (char*) &thd;
     thd->store_globals();
+    thd->security_ctx->skip_grants();
   }
   else
     thd= current_thd;

=== modified file 'sql/rpl_info_table_access.h'
--- a/sql/rpl_info_table_access.h	2010-10-25 10:39:01 +0000
+++ b/sql/rpl_info_table_access.h	2010-11-11 11:53:01 +0000
@@ -36,8 +36,8 @@ public:
                   TABLE** table, Open_tables_backup* backup);
   bool close_table(THD* thd, TABLE* table, Open_tables_backup* backup,
                    bool error);
-  enum enum_return_id find_info_for_server_id(ulong server_id, uint idx, Rpl_info_values *,
-                                              TABLE *table);
+  enum enum_return_id find_info(const ulong *uidx, const uint nidx,
+                                Rpl_info_values *, TABLE *table);
   bool load_info_values(uint max_num_field, Field **fields,
                         Rpl_info_values *field_values);
   bool store_info_values(uint max_num_field, Field **fields,

=== modified file 'sql/rpl_mi.cc'
--- a/sql/rpl_mi.cc	2010-12-21 09:33:41 +0000
+++ b/sql/rpl_mi.cc	2010-12-27 18:54:41 +0000
@@ -76,22 +76,8 @@ const char *info_mi_fields []=
   "retry_count"
 };
 
-Master_info::Master_info(
-#ifdef HAVE_PSI_INTERFACE
-                         PSI_mutex_key *param_key_info_run_lock,
-                         PSI_mutex_key *param_key_info_data_lock,
-                         PSI_mutex_key *param_key_info_data_cond,
-                         PSI_mutex_key *param_key_info_start_cond,
-                         PSI_mutex_key *param_key_info_stop_cond
-#endif
-                        )
-   :Rpl_info("I/O"
-#ifdef HAVE_PSI_INTERFACE
-             ,param_key_info_run_lock, param_key_info_data_lock,
-             param_key_info_data_cond, param_key_info_start_cond,
-             param_key_info_stop_cond
-#endif
-            ),
+Master_info::Master_info()
+   :Rpl_info_coordinator("I/O", "IO-Thread"),
    ssl(0), ssl_verify_server_cert(0),
    port(MYSQL_PORT), connect_retry(DEFAULT_CONNECT_RETRY),
    clock_diff_with_master(0), heartbeat_period(0),
@@ -103,12 +89,13 @@ Master_info::Master_info(
   ssl_ca[0]= 0; ssl_capath[0]= 0; ssl_cert[0]= 0;
   ssl_cipher[0]= 0; ssl_key[0]= 0;
   master_uuid[0]= 0;
-  ignore_server_ids= new Server_ids();
+  ignore_server_ids= new Server_ids(sizeof(::server_id));
 }
 
 Master_info::~Master_info()
 {
-  delete ignore_server_ids;
+  if (ignore_server_ids)
+    delete ignore_server_ids;
 }
 
 /**
@@ -138,13 +125,13 @@ int change_master_server_id_cmp(ulong *i
  */
 bool Master_info::shall_ignore_server_id(ulong s_id)
 {
-  if (likely(ignore_server_ids->server_ids.elements == 1))
+  if (likely(ignore_server_ids->dynamic_ids.elements == 1))
     return (* (ulong*)
-      dynamic_array_ptr(&(ignore_server_ids->server_ids), 0)) == s_id;
+      dynamic_array_ptr(&(ignore_server_ids->dynamic_ids), 0)) == s_id;
   else      
     return bsearch((const ulong *) &s_id,
-                   ignore_server_ids->server_ids.buffer,
-                   ignore_server_ids->server_ids.elements, sizeof(ulong),
+                   ignore_server_ids->dynamic_ids.buffer,
+                   ignore_server_ids->dynamic_ids.elements, sizeof(ulong),
                    (int (*) (const void*, const void*)) change_master_server_id_cmp)
       != NULL;
 }
@@ -179,7 +166,7 @@ void Master_info::end_info()
   if (!inited)
     DBUG_VOID_RETURN;
 
-  handler->end_info();
+  handler->end_info(uidx, nidx);
 
   inited = 0;
 
@@ -228,7 +215,10 @@ int Master_info::flush_info(bool force)
   */
   handler->set_sync_period(sync_masterinfo_period);
 
-  if (write_info(handler, force))
+  if (write_info(handler))
+    goto err;
+
+  if (handler->flush_info(uidx, nidx, force))
     goto err;
 
   DBUG_RETURN(0);
@@ -257,7 +247,7 @@ int Master_info::init_info()
   mysql= 0; file_id= 1;
   int necessary_to_configure= check_info();
   
-  if (handler->init_info())
+  if (handler->init_info(uidx, nidx))
     goto err;
 
   if (necessary_to_configure)
@@ -314,8 +304,9 @@ bool Master_info::read_info(Rpl_info_han
      is this.
   */
 
-  if (from->prepare_info_for_read() || 
-      from->get_info(master_log_name, sizeof(master_log_name), ""))
+  if (from->prepare_info_for_read(nidx) || 
+      from->get_info(master_log_name, (size_t) sizeof(master_log_name),
+                     (char *) ""))
     DBUG_RETURN(TRUE);
 
   lines= strtoul(master_log_name, &first_non_digit, 10);
@@ -324,17 +315,18 @@ bool Master_info::read_info(Rpl_info_han
       *first_non_digit=='\0' && lines >= LINES_IN_MASTER_INFO_WITH_SSL)
   {
     /* Seems to be new format => read master log name */
-    if (from->get_info(master_log_name,  sizeof(master_log_name), ""))
+    if (from->get_info(master_log_name, (size_t) sizeof(master_log_name),
+                       (char *) ""))
       DBUG_RETURN(TRUE);
   }
   else 
     lines= 7;
 
   if (from->get_info(&temp_master_log_pos,
-                        (ulong) BIN_LOG_HEADER_SIZE) ||
-      from->get_info(host, sizeof(host), 0) ||
-      from->get_info(user, sizeof(user), "test") ||
-      from->get_info(password, sizeof(password), 0) ||
+                     (ulong) BIN_LOG_HEADER_SIZE) ||
+      from->get_info(host, (size_t) sizeof(host), (char *) 0) ||
+      from->get_info(user, (size_t) sizeof(user), (char *) "test") ||
+      from->get_info(password, (size_t) sizeof(password), (char *) 0) ||
       from->get_info((int *) &port, (int) MYSQL_PORT) ||
       from->get_info((int *) &connect_retry,
                         (int) DEFAULT_CONNECT_RETRY))
@@ -349,11 +341,11 @@ bool Master_info::read_info(Rpl_info_han
   if (lines >= LINES_IN_MASTER_INFO_WITH_SSL)
   {
     if (from->get_info(&temp_ssl, 0) ||
-        from->get_info(ssl_ca, sizeof(ssl_ca), 0) ||
-        from->get_info(ssl_capath, sizeof(ssl_capath), 0) ||
-        from->get_info(ssl_cert, sizeof(ssl_cert), 0) ||
-        from->get_info(ssl_cipher, sizeof(ssl_cipher), 0) ||
-        from->get_info(ssl_key, sizeof(ssl_key), 0))
+        from->get_info(ssl_ca, (size_t) sizeof(ssl_ca), (char *) 0) ||
+        from->get_info(ssl_capath, (size_t) sizeof(ssl_capath), (char *) 0) ||
+        from->get_info(ssl_cert, (size_t) sizeof(ssl_cert), (char *) 0) ||
+        from->get_info(ssl_cipher, (size_t) sizeof(ssl_cipher), (char *) 0) ||
+        from->get_info(ssl_key, (size_t) sizeof(ssl_key), (char *) 0))
       DBUG_RETURN(TRUE);
   }
 
@@ -382,7 +374,7 @@ bool Master_info::read_info(Rpl_info_han
   */
   if (lines >= LINE_FOR_MASTER_BIND)
   {
-    if (from->get_info(bind_addr, sizeof(bind_addr), ""))
+    if (from->get_info(bind_addr, (size_t) sizeof(bind_addr), (char *) ""))
       DBUG_RETURN(TRUE);
   }
 
@@ -392,14 +384,15 @@ bool Master_info::read_info(Rpl_info_han
   */
   if (lines >= LINE_FOR_REPLICATE_IGNORE_SERVER_IDS)
   {
-     if (from->get_info(ignore_server_ids, (Server_ids *) NULL))
+     if (from->get_info(ignore_server_ids, (Dynamic_ids *) NULL))
       DBUG_RETURN(TRUE);
   }
 
   /* Starting from 5.5 the master_uuid may be in the repository. */
   if (lines >= LINE_FOR_MASTER_UUID)
   {
-    if (from->get_info(master_uuid, sizeof(master_uuid), 0))
+    if (from->get_info(master_uuid, (size_t) sizeof(master_uuid),
+                       (char *) 0))
       DBUG_RETURN(TRUE);
   }
 
@@ -424,7 +417,7 @@ bool Master_info::read_info(Rpl_info_han
   DBUG_RETURN(FALSE);
 }
 
-bool Master_info::write_info(Rpl_info_handler *to, bool force)
+bool Master_info::write_info(Rpl_info_handler *to)
 {
   DBUG_ENTER("Master_info::write_info");
 
@@ -436,10 +429,10 @@ bool Master_info::write_info(Rpl_info_ha
      of file we don't care about this garbage.
   */
 
-  if (to->prepare_info_for_write() ||
+  if (to->prepare_info_for_write(nidx) ||
       to->set_info((int) LINES_IN_MASTER_INFO) ||
       to->set_info(master_log_name) ||
-      to->set_info((ulong)master_log_pos) ||
+      to->set_info((ulong) master_log_pos) ||
       to->set_info(host) ||
       to->set_info(user) ||
       to->set_info(password) ||
@@ -451,7 +444,7 @@ bool Master_info::write_info(Rpl_info_ha
       to->set_info(ssl_cert) ||
       to->set_info(ssl_cipher) ||
       to->set_info(ssl_key) ||
-      to->set_info(ssl_verify_server_cert) ||
+      to->set_info((int) ssl_verify_server_cert) ||
       to->set_info(heartbeat_period) ||
       to->set_info(bind_addr) ||
       to->set_info(ignore_server_ids) ||
@@ -459,9 +452,6 @@ bool Master_info::write_info(Rpl_info_ha
       to->set_info(retry_count))
     DBUG_RETURN(TRUE);
 
-  if (to->flush_info(force))
-    DBUG_RETURN(TRUE);
-
   DBUG_RETURN(FALSE);
 }
 #endif /* HAVE_REPLICATION */

=== modified file 'sql/rpl_mi.h'
--- a/sql/rpl_mi.h	2010-12-21 09:33:41 +0000
+++ b/sql/rpl_mi.h	2010-12-27 18:54:41 +0000
@@ -60,18 +60,10 @@ typedef struct st_mysql MYSQL;
 
 *****************************************************************************/
 
-class Master_info : public Rpl_info
+class Master_info : public Rpl_info_coordinator
 {
  public:
-  Master_info(
-#ifdef HAVE_PSI_INTERFACE
-              PSI_mutex_key *param_key_info_run_lock,
-              PSI_mutex_key *param_key_info_data_lock,
-              PSI_mutex_key *param_key_info_data_cond,
-              PSI_mutex_key *param_key_info_start_cond,
-              PSI_mutex_key *param_key_info_stop_cond
-#endif
-             );
+  Master_info();
   virtual ~Master_info();
 
   /* the variables below are needed because we can change masters on the fly */
@@ -100,8 +92,11 @@ class Master_info : public Rpl_info
   long clock_diff_with_master;
   float heartbeat_period;         // interface with CHANGE MASTER or master.info
   ulonglong received_heartbeats;  // counter of received heartbeat events
+
   time_t last_heartbeat;
-  Server_ids *ignore_server_ids;
+
+  Dynamic_ids *ignore_server_ids;
+
   ulong master_id;
   /*
     to hold checksum alg in use until IO thread has received FD.
@@ -144,7 +139,7 @@ public:
 
 private:
   bool read_info(Rpl_info_handler *from);
-  bool write_info(Rpl_info_handler *to, bool force);
+  bool write_info(Rpl_info_handler *to);
 
   Master_info& operator=(const Master_info& info);
   Master_info(const Master_info& info);

=== modified file 'sql/rpl_reporting.cc'
--- a/sql/rpl_reporting.cc	2010-08-05 17:45:25 +0000
+++ b/sql/rpl_reporting.cc	2011-05-16 19:43:58 +0000
@@ -29,12 +29,20 @@ void
 Slave_reporting_capability::report(loglevel level, int err_code,
                                    const char *msg, ...) const
 {
+  va_list args;
+  va_start(args, msg);
+  do_report(level,  err_code, msg, args);
+  va_end(args);
+}
+
+void
+Slave_reporting_capability::do_report(loglevel level, int err_code,
+                                   const char *msg, va_list args) const
+{
   void (*report_function)(const char *, ...);
   char buff[MAX_SLAVE_ERRMSG];
   char *pbuff= buff;
   uint pbuffsize= sizeof(buff);
-  va_list args;
-  va_start(args, msg);
 
   mysql_mutex_lock(&err_lock);
   switch (level)
@@ -64,7 +72,6 @@ Slave_reporting_capability::report(logle
   my_vsnprintf(pbuff, pbuffsize, msg, args);
 
   mysql_mutex_unlock(&err_lock);
-  va_end(args);
 
   /* If the msg string ends with '.', do not add a ',' it would be ugly */
   report_function("Slave %s: %s%s Error_code: %d",

=== modified file 'sql/rpl_reporting.h'
--- a/sql/rpl_reporting.h	2010-08-05 17:45:25 +0000
+++ b/sql/rpl_reporting.h	2011-05-16 19:43:58 +0000
@@ -52,8 +52,10 @@ public:
                         code, but can contain more information), in
                         printf() format.
   */
-  void report(loglevel level, int err_code, const char *msg, ...) const
+  virtual void report(loglevel level, int err_code, const char *msg, ...) const
     ATTRIBUTE_FORMAT(printf, 4, 5);
+  void do_report(loglevel level, int err_code,
+                 const char *msg, va_list v_args) const;
 
   /**
      Clear errors. They will not show up under <code>SHOW SLAVE

=== modified file 'sql/rpl_rli.cc'
--- a/sql/rpl_rli.cc	2010-12-21 09:33:41 +0000
+++ b/sql/rpl_rli.cc	2011-05-16 19:43:58 +0000
@@ -1,4 +1,4 @@
-/* Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
+/* /opyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved.
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -27,6 +27,7 @@
 #include "transaction.h"
 #include "sql_parse.h"                          // end_trans, ROLLBACK
 #include "rpl_slave.h"
+#include "rpl_rli_pdb.h"
 
 /*
   Please every time you add a new field to the relay log info, update
@@ -40,27 +41,24 @@ const char* info_rli_fields[]=
   "group_relay_log_pos",
   "group_master_log_name",
   "group_master_log_pos",
-  "sql_delay"
+  "sql_delay",
+  "number_of_workers"
 };
 
 const char *const Relay_log_info::state_delaying_string = "Waiting until MASTER_DELAY seconds after master executed event";
 
-Relay_log_info::Relay_log_info(bool is_slave_recovery
-#ifdef HAVE_PSI_INTERFACE
-                               ,PSI_mutex_key *param_key_info_run_lock,
-                               PSI_mutex_key *param_key_info_data_lock,
-                               PSI_mutex_key *param_key_info_data_cond,
-                               PSI_mutex_key *param_key_info_start_cond,
-                               PSI_mutex_key *param_key_info_stop_cond
-#endif
-                              )
-   :Rpl_info("SQL"
-#ifdef HAVE_PSI_INTERFACE
-             ,param_key_info_run_lock, param_key_info_data_lock,
-             param_key_info_data_cond, param_key_info_start_cond,
-             param_key_info_stop_cond
-#endif
-            ),
+static PSI_mutex_info *worker_mutexes= NULL;
+static PSI_cond_info *worker_conds= NULL;
+
+PSI_mutex_key *key_mutex_slave_parallel_worker= NULL;
+PSI_mutex_key key_mutex_slave_parallel_pend_jobs;
+PSI_mutex_key key_mutex_mts_temp_tables_lock;
+
+PSI_cond_key *key_cond_slave_parallel_worker= NULL;
+PSI_cond_key key_cond_slave_parallel_pend_jobs;
+
+Relay_log_info::Relay_log_info(bool is_slave_recovery)
+   :Rpl_info_coordinator("SQL", "SQL-Thread"), checkpoint_thd(0), checkpoint_running(0),
    replicate_same_server_id(::replicate_same_server_id),
    cur_log_fd(-1), relay_log(&sync_relaylog_period),
    is_relay_log_recovery(is_slave_recovery),
@@ -68,31 +66,94 @@ Relay_log_info::Relay_log_info(bool is_s
    cur_log_old_open_count(0), group_relay_log_pos(0), event_relay_log_pos(0),
    group_master_log_pos(0), log_space_total(0), ignore_log_space_limit(0),
    last_master_timestamp(0), slave_skip_counter(0),
-   abort_pos_wait(0), until_condition(UNTIL_NONE),
+   abort_pos_wait(0), slave_exec_mode(0), until_condition(UNTIL_NONE),
    until_log_pos(0), retried_trans(0),
    tables_to_lock(0), tables_to_lock_count(0),
    rows_query_ev(NULL), last_event_start_time(0),
-   sql_delay(0), sql_delay_end(0),
-   m_flags(0)
+   this_worker(NULL), slave_parallel_workers(0),
+   recovery_parallel_workers(0),
+   checkpoint_group(mts_checkpoint_group), mts_recovery_group_cnt(0),
+   mts_recovery_index(0),
+   sql_delay(0), sql_delay_end(0), m_flags(0)
 {
   DBUG_ENTER("Relay_log_info::Relay_log_info");
 
   group_relay_log_name[0]= event_relay_log_name[0]=
     group_master_log_name[0]= 0;
   until_log_name[0]= ign_master_log_name_end[0]= 0;
+
+  set_timespec_nsec(last_clock, 0);
+
+  bitmap_init(&recovery_groups, NULL, checkpoint_group, FALSE);
   bzero((char*) &cache_buf, sizeof(cache_buf));
   cached_charset_invalidate();
   mysql_mutex_init(key_relay_log_info_log_space_lock,
                    &log_space_lock, MY_MUTEX_INIT_FAST);
   mysql_cond_init(key_relay_log_info_log_space_cond, &log_space_cond, NULL);
   relay_log.init_pthread_objects();
+
   DBUG_VOID_RETURN;
 }
 
+/**
+   The method to invoke at slave threads start
+*/
+void Relay_log_info::init_workers(ulong n)
+{
+  uint wi= 0;
+
+  /*
+    Parallel slave parameters initialization is done regardless
+    whether the feature is or going to be active or not.
+  */
+  trans_jobs= stmt_jobs= pending_jobs= wait_jobs= 0;
+  mts_wqs_underrun_cnt= mts_wqs_overfill_cnt= 0;
+
+  key_mutex_slave_parallel_worker= new PSI_mutex_key[slave_parallel_workers];
+  key_cond_slave_parallel_worker= new PSI_cond_key[slave_parallel_workers];
+  worker_mutexes= new PSI_mutex_info[slave_parallel_workers];
+  worker_conds= new PSI_cond_info[slave_parallel_workers];
+  for (wi= 0; wi < slave_parallel_workers; wi++)
+  {
+     worker_mutexes[wi].m_key= (PSI_mutex_key *) &(key_mutex_slave_parallel_worker[wi]);
+     worker_mutexes[wi].m_name= "Slave_worker::jobs_lock";
+     worker_mutexes[wi].m_flags= 0;
+     worker_conds[wi].m_key= (PSI_cond_key *) &(key_cond_slave_parallel_worker[wi]);
+     worker_conds[wi].m_name= "Slave_worker::jobs_cond";
+     worker_conds[wi].m_flags= 0;
+  }
+  if (PSI_server)
+  {
+    PSI_server->register_mutex("worker", worker_mutexes,
+                               slave_parallel_workers);
+    PSI_server->register_cond("worker", worker_conds,
+                               slave_parallel_workers);
+  }
+  mysql_mutex_init(key_mutex_slave_parallel_pend_jobs, &pending_jobs_lock,
+                   MY_MUTEX_INIT_FAST);
+  mysql_cond_init(key_cond_slave_parallel_pend_jobs, &pending_jobs_cond, NULL);
+  mysql_mutex_init(key_mutex_mts_temp_tables_lock, &mts_temp_tables_lock,
+                   MY_MUTEX_INIT_FAST);
+  my_init_dynamic_array(&workers, sizeof(Slave_worker *), slave_parallel_workers, 4);
+}
+
+/**
+   The method to invoke at slave threads stop
+*/
+void Relay_log_info::deinit_workers()
+{
+  mysql_mutex_destroy(&pending_jobs_lock);
+  mysql_cond_destroy(&pending_jobs_cond);
+  mysql_mutex_destroy(&mts_temp_tables_lock);
+
+  delete_dynamic(&workers);
+}
+
 Relay_log_info::~Relay_log_info()
 {
   DBUG_ENTER("Relay_log_info::~Relay_log_info");
 
+  bitmap_free(&recovery_groups);
   mysql_mutex_destroy(&log_space_lock);
   mysql_cond_destroy(&log_space_cond);
   relay_log.cleanup();
@@ -100,6 +161,47 @@ Relay_log_info::~Relay_log_info()
   DBUG_VOID_RETURN;
 }
 
+/**
+   Method is called when MTS coordinator senses the relay-log name
+   has been changed.
+   It marks each Worker member with this fact to make an action
+   at time it will distribute a terminal event of a group to the Worker.
+
+   Worker receives the new name at the group commiting phase
+   @c Slave_worker::slave_worker_ends_group().
+*/
+void Relay_log_info::reset_notified_relay_log_change()
+{
+  if (!is_parallel_exec())
+    return;
+  for (uint i= 0; i < workers.elements; i++)
+  {
+    Slave_worker *w= *(Slave_worker **) dynamic_array_ptr(&workers, i);
+    w->relay_log_change_notified= FALSE;
+  }
+}
+
+/**
+   Method is called in mts_checkpoint_routine()
+   to marks each Worker as requiring to adapt to a new checkpoint interval
+   whose coordinates is passed to it through GAQ index.
+
+   Worker notices the new checkpoint value at the group commit
+   to reset the current bitmap and set ON a bit number put by C into GAQ index
+   as the first group committed after the new checkpoint.
+*/
+void Relay_log_info::reset_notified_checkpoint()
+{
+  if (!is_parallel_exec())
+    return;
+  for (uint i= 0; i < workers.elements; i++)
+  {
+    Slave_worker *w= *(Slave_worker **) dynamic_array_ptr(&workers, i);
+    w->checkpoint_notified= FALSE;
+  }
+  checkpoint_seqno= 0;
+}
+
 static inline int add_relay_log(Relay_log_info* rli,LOG_INFO* linfo)
 {
   MY_STAT s;
@@ -913,11 +1015,21 @@ void Relay_log_info::stmt_done(my_off_t 
     while the MyISAM table has already been updated.
   */
   if ((info_thd->variables.option_bits & OPTION_BEGIN) && opt_using_transactions)
-    inc_event_relay_log_pos();
+    inc_event_relay_log_pos(); // todo: ev-> future_event_relay_log_pos
   else
   {
     inc_group_relay_log_pos(event_master_log_pos);
+    
+    DBUG_ASSERT(this_worker == NULL);
+    
     flush_info(is_transactional() ? TRUE : FALSE);
+
+    /* 
+       The central recovery commit run in sequential mode forces
+       notification on the defacto new checkpoint.
+    */
+    if (is_parallel_exec())
+      reset_notified_checkpoint();
   }
 }
 
@@ -926,7 +1038,7 @@ void Relay_log_info::cleanup_context(THD
 {
   DBUG_ENTER("Relay_log_info::cleanup_context");
 
-  DBUG_ASSERT(info_thd == thd);
+  DBUG_ASSERT((info_thd == thd));
   /*
     1) Instances of Table_map_log_event, if ::do_apply_event() was called on them,
     may have opened tables, which we cannot be sure have been closed (because
@@ -943,13 +1055,24 @@ void Relay_log_info::cleanup_context(THD
   {
     trans_rollback_stmt(thd); // if a "statement transaction"
     trans_rollback(thd);      // if a "real transaction"
+  }
+  /*
+    MTS W/a for Rows_query_log_event.
+    Cleanup of rows_query_ev at the end of the current statement.
+
+    TODO: move handle_rows_query_log_event() cleanup logics into this method
+          inconditionally.
+  */
+  if (error || is_parallel_exec())
     if (rows_query_ev)
     {
       delete rows_query_ev;
       rows_query_ev= NULL;
+      info_thd->set_query(NULL, 0);
     }
-  }
+
   m_table_map.clear_tables();
+
   slave_close_thread_tables(thd);
   if (error)
     thd->mdl_context.release_transactional_locks();
@@ -1079,7 +1202,8 @@ int Relay_log_info::init_info()
     if (hot_log)
       mysql_mutex_unlock(log_lock);
 
-    DBUG_RETURN(0);
+    DBUG_RETURN(recovery_parallel_workers ?
+                mts_recovery_groups(this, &recovery_groups) : 0);
   }
 
   cur_log_fd = -1;
@@ -1183,7 +1307,7 @@ a file name for --relay-log-index option
     the handler->init_info().
   */
   int necessary_to_configure= check_info();
-  if ((error= handler->init_info()))
+  if ((error= handler->init_info(uidx, nidx)))
   {
     msg= "Error reading relay log configuration";
     error= 1;
@@ -1273,7 +1397,7 @@ void Relay_log_info::end_info()
   if (!inited)
     DBUG_VOID_RETURN;
 
-  handler->end_info();
+  handler->end_info(uidx, nidx);
 
   if (cur_log_fd >= 0)
   {
@@ -1366,7 +1490,7 @@ void Relay_log_info::set_master_info(Mas
 
   @return 0 on success, 1 on error.
 */
-int Relay_log_info::flush_info(bool force)
+int Relay_log_info::flush_info(const bool force)
 {
   DBUG_ENTER("Relay_log_info::flush_info");
 
@@ -1378,7 +1502,10 @@ int Relay_log_info::flush_info(bool forc
   */
   handler->set_sync_period(sync_relayloginfo_period);
 
-  if (write_info(handler, force))
+  if (write_info(handler))
+    goto err;
+
+  if (handler->flush_info(uidx, nidx, force))
     goto err;
 
   DBUG_RETURN(0);
@@ -1441,8 +1568,9 @@ bool Relay_log_info::read_info(Rpl_info_
     it is line count and not binlog name (new format) it will be
     overwritten by the second row later.
   */
-  if (from->prepare_info_for_read() ||
-      from->get_info(group_relay_log_name, sizeof(group_relay_log_name), ""))
+  if (from->prepare_info_for_read(nidx) ||
+      from->get_info(group_relay_log_name, (size_t) sizeof(group_relay_log_name),
+                     (char *) ""))
     DBUG_RETURN(TRUE);
 
   lines= strtoul(group_relay_log_name, &first_non_digit, 10);
@@ -1451,18 +1579,20 @@ bool Relay_log_info::read_info(Rpl_info_
       *first_non_digit=='\0' && lines >= LINES_IN_RELAY_LOG_INFO_WITH_DELAY)
   {
     /* Seems to be new format => read group relay log name */
-    if (from->get_info(group_relay_log_name,  sizeof(group_relay_log_name), ""))
+    if (from->get_info(group_relay_log_name, (size_t) sizeof(group_relay_log_name),
+                       (char *) ""))
       DBUG_RETURN(TRUE);
   }
   else
      DBUG_PRINT("info", ("relay_log_info file is in old format."));
 
   if (from->get_info((ulong *) &temp_group_relay_log_pos,
-                        (ulong) BIN_LOG_HEADER_SIZE) ||
+                     (ulong) BIN_LOG_HEADER_SIZE) ||
       from->get_info(group_master_log_name,
-                        sizeof(group_relay_log_name), "") ||
+                     (size_t) sizeof(group_relay_log_name),
+                     (char *) "") ||
       from->get_info((ulong *) &temp_group_master_log_pos,
-                        (ulong) 0))
+                     (ulong) 0))
     DBUG_RETURN(TRUE);
 
   if (lines >= LINES_IN_RELAY_LOG_INFO_WITH_DELAY)
@@ -1471,6 +1601,12 @@ bool Relay_log_info::read_info(Rpl_info_
       DBUG_RETURN(TRUE);
   }
 
+  if (lines >= LINES_IN_RELAY_LOG_INFO_WITH_WORKERS)
+  {
+    if (from->get_info(&recovery_parallel_workers,(ulong) 0))
+      DBUG_RETURN(TRUE);
+  }
+
   group_relay_log_pos=  temp_group_relay_log_pos;
   group_master_log_pos= temp_group_master_log_pos;
   sql_delay= (int32) temp_sql_delay;
@@ -1478,7 +1614,7 @@ bool Relay_log_info::read_info(Rpl_info_
   DBUG_RETURN(FALSE);
 }
 
-bool Relay_log_info::write_info(Rpl_info_handler *to, bool force)
+bool Relay_log_info::write_info(Rpl_info_handler *to)
 {
   DBUG_ENTER("Relay_log_info::write_info");
 
@@ -1488,16 +1624,14 @@ bool Relay_log_info::write_info(Rpl_info
   */
   //DBUG_ASSERT(!belongs_to_client());
 
-  if (to->prepare_info_for_write() ||
-      to->set_info((int) LINES_IN_RELAY_LOG_INFO_WITH_DELAY) ||
+  if (to->prepare_info_for_write(nidx) ||
+      to->set_info((int) LINES_IN_RELAY_LOG_INFO_WITH_WORKERS) ||
       to->set_info(group_relay_log_name) ||
       to->set_info((ulong) group_relay_log_pos) ||
       to->set_info(group_master_log_name) ||
       to->set_info((ulong) group_master_log_pos) ||
-      to->set_info((int) sql_delay))
-    DBUG_RETURN(TRUE);
-
-  if (to->flush_info(force))
+      to->set_info((int) sql_delay) ||
+      to->set_info(recovery_parallel_workers))
     DBUG_RETURN(TRUE);
 
   DBUG_RETURN(FALSE);

=== modified file 'sql/rpl_rli.h'
--- a/sql/rpl_rli.h	2010-12-21 09:33:41 +0000
+++ b/sql/rpl_rli.h	2011-05-16 19:43:58 +0000
@@ -102,9 +102,12 @@ transactional or non-transactional is us
 To correctly recovery from failures, one should combine transactional system
 tables along with the --relay-log-recovery.
 *******************************************************************************/
-class Relay_log_info : public Rpl_info
+class Relay_log_info : public Rpl_info_coordinator
 {
 public:
+  THD* checkpoint_thd;
+  bool checkpoint_running;
+
   /**
      Flags for the state of the replication.
    */
@@ -203,7 +206,6 @@ public:
     happen when, for example, the relay log gets rotated because of
     max_binlog_size.
   */
-protected:
   char group_relay_log_name[FN_REFLEN];
   ulonglong group_relay_log_pos;
   char event_relay_log_name[FN_REFLEN];
@@ -225,6 +227,8 @@ protected:
   char group_master_log_name[FN_REFLEN];
   volatile my_off_t group_master_log_pos;
 
+// TODO: Restore!
+protected:
   /*
     When it commits, InnoDB internally stores the master log position it has
     processed so far; the position to store is the one of the end of the
@@ -276,6 +280,12 @@ public:
   mysql_cond_t log_space_cond;
 
   /*
+    A cache for the global system variable's value.
+    The value is reset at the beginning of each statement execution.
+  */
+  ulong slave_exec_mode;
+
+  /*
      Condition and its parameters from START SLAVE UNTIL clause.
      
      UNTIL condition is tested with is_until_satisfied() method that is
@@ -326,17 +336,11 @@ public:
     LOAD DATA INFILE. This is used for security reasons.
    */ 
   char slave_patternload_file[FN_REFLEN]; 
-  size_t slave_patternload_file_size;  
+  size_t slave_patternload_file_size;
 
-  Relay_log_info(bool is_slave_recovery
-#ifdef HAVE_PSI_INTERFACE
-                 ,PSI_mutex_key *param_key_info_run_lock,
-                 PSI_mutex_key *param_key_info_data_lock,
-                 PSI_mutex_key *param_key_info_data_cond,
-                 PSI_mutex_key *param_key_info_start_cond,
-                 PSI_mutex_key *param_key_info_stop_cond
-#endif
-                );
+  struct timespec last_clock;
+
+  Relay_log_info(bool is_slave_recovery);
   virtual ~Relay_log_info();
 
   /**
@@ -384,12 +388,14 @@ public:
   uint tables_to_lock_count;        /* RBR: Count of tables to lock */
   table_mapping m_table_map;      /* RBR: Mapping table-id to table */
   /* RBR: Record Rows_query log event */
-  Rows_query_log_event* rows_query_ev;
+  volatile Rows_query_log_event* rows_query_ev;  // mts w/a makes it volatile
 
   bool get_table_data(TABLE *table_arg, table_def **tabledef_var, TABLE **conv_table_var) const
   {
+    TABLE_LIST *tables= tables_to_lock;
+
     DBUG_ASSERT(tabledef_var && conv_table_var);
-    for (TABLE_LIST *ptr= tables_to_lock ; ptr != NULL ; ptr= ptr->next_global)
+    for (TABLE_LIST *ptr= tables ; ptr != NULL ; ptr= ptr->next_global)
       if (ptr->table == table_arg)
       {
         *tabledef_var= &static_cast<RPL_TABLE_LIST*>(ptr)->m_tabledef;
@@ -424,6 +430,103 @@ public:
   */
   time_t last_event_start_time;
 
+  /*
+    WL#5569 MTS-II
+  */
+  DYNAMIC_ARRAY workers; // number's is determined by global slave_parallel_workers
+  volatile ulong pending_jobs;
+  ulong trans_jobs, wait_jobs, stmt_jobs; // wait_jobs - waiting times due to the total size
+  mysql_mutex_t pending_jobs_lock;
+  mysql_cond_t pending_jobs_cond;
+  ulong       mts_slave_worker_queue_len_max;
+  ulonglong   mts_pending_jobs_size;      // actual mem usage by WQ:s
+  ulonglong   mts_pending_jobs_size_max;  // the max forcing to wait by C
+  bool    mts_wqs_oversize;           // C raises flag to wait some memory's released
+  Slave_worker  *last_assigned_worker; // a hint to partitioning func for some events
+  Slave_committed_queue *gaq;
+  DYNAMIC_ARRAY curr_group_assigned_parts; // CGAP
+  DYNAMIC_ARRAY curr_group_da;  // deferred array to hold partition-info-free events
+  bool curr_group_seen_begin;   // current group started with B-event or not
+  bool curr_group_isolated;     // Trans is exec:d by Worker but in exclusive env
+  volatile ulong mts_wqs_underrun_w_id;  // Id of a Worker whose queue is getting empty
+  volatile long mts_wqs_overrun;   // W to incr and decr
+  ulong mts_wqs_underrun_cnt;  // Coord goes to sleep when senses Workers are content
+  ulong mts_wqs_overfill_cnt;  // Coord waits if a W's queue is full
+  long  mts_worker_underrun_level; // percent of WQ size at which Worker claims hungry
+  ulong mts_coordinator_basic_nap; // C sleeps to avoid WQs overrun
+  Slave_worker* this_worker; // used by w_rli. The cental rli has it as NULL.
+  ulonglong mts_total_groups; // total event groups distributed in current session
+ 
+  bool curr_group_is_parallel; // an event to process by Coordinator
+  bool curr_group_split;       // an event split the current group forcing C to exec it
+  ulong opt_slave_parallel_workers; // auxiliary cache for ::opt_slave_parallel_workers
+  ulong slave_parallel_workers;     // the one slave session time number of workers
+  ulong recovery_parallel_workers; // number of workers while recovering.
+  /* 
+     A sorted array of Worker current assignements number to provide
+     approximate view on Workers loading.
+     The first row of the least occupied Worker is queried at assigning 
+     a new partition. Is updated at checkpoint commit to the main RLI.
+  */
+  DYNAMIC_ARRAY least_occupied_workers;
+  uint checkpoint_seqno;  // counter of groups executed after the most recent CP
+  uint checkpoint_group;  // counter of groups after which a checkpoint is called.
+  MY_BITMAP recovery_groups;  // bitmap used during recovery.
+  ulong mts_recovery_group_cnt; // number of groups to execute at recovery
+  ulong mts_recovery_index;     // running index of recoverable groups
+  /*
+    temporary tables are held by Coordinator though are created and dropped
+    explicilty by Workers. The following lock has to be taken by either party
+    in order to conduct any operation in the temp tables placeholder, incl.
+    find, drop, create, open.
+  */
+  mysql_mutex_t mts_temp_tables_lock;
+  /*
+    While Worker utilize its thd->mem_root, Coordinator adopts a specific mem-root:
+  */
+  MEM_ROOT mts_coor_mem_root;
+
+  /* most of allocation in the coordinator rli is there */
+  void init_workers(ulong);
+
+  /* counterpart of the init */
+  void deinit_workers();
+
+  /**
+     returns true if there is any gap-group of events to execute
+                  at slave starting phase.
+  */
+  inline bool is_mts_recovery() const
+  {
+    return mts_recovery_group_cnt != 0;
+  }
+
+  /**
+     returns true if events are to be executed in parallel
+  */
+  inline bool is_parallel_exec() const
+  {
+    bool ret= (slave_parallel_workers > 0) && !is_mts_recovery();
+
+    DBUG_ASSERT(!ret || workers.elements > 0);
+
+    return ret;
+  }
+
+  /**
+     While a group is executed by a Worker the relay log can change.
+     Coordinator notifies Workers about this event. Worker is supposed
+     to commit to the recovery table with the new info.
+  */
+  void reset_notified_relay_log_change();
+
+  /**
+     While a group is executed by a Worker the relay log can change.
+     Coordinator notifies Workers about this event. Coordinator and Workers
+     maintain a bitmap of executed group that is reset with a new checkpoint. 
+  */
+  void reset_notified_checkpoint();
+
   /**
     Helper function to do after statement completion.
 
@@ -616,8 +719,13 @@ private:
   */
   static const int LINES_IN_RELAY_LOG_INFO_WITH_DELAY= 5;
 
+  /*
+    Before the WL#5599, relay_log.info had 5 lines. Now it has 6 lines.
+  */
+  static const int LINES_IN_RELAY_LOG_INFO_WITH_WORKERS= 6;
+
   bool read_info(Rpl_info_handler *from);
-  bool write_info(Rpl_info_handler *to, bool force);
+  bool write_info(Rpl_info_handler *to);
 
   Relay_log_info& operator=(const Relay_log_info& info);
   Relay_log_info(const Relay_log_info& info);
@@ -626,4 +734,5 @@ private:
 };
 
 bool mysql_show_relaylog_events(THD* thd);
+
 #endif /* RPL_RLI_H */

=== added file 'sql/rpl_rli_pdb.cc'
--- a/sql/rpl_rli_pdb.cc	1970-01-01 00:00:00 +0000
+++ b/sql/rpl_rli_pdb.cc	2011-05-16 19:43:58 +0000
@@ -0,0 +1,923 @@
+#include "my_global.h"                          /* NO_EMBEDDED_ACCESS_CHECKS */
+#include "sql_priv.h"
+#include "unireg.h"
+#include "rpl_rli_pdb.h"
+#include "sql_string.h"
+#include <hash.h>
+
+/*
+  Please every time you add a new field to the worker slave info, update
+  what follows. For now, this is just used to get the number of fields.
+*/
+const char *info_slave_worker_fields []=
+{
+  "relay_log_name",
+  "relay_log_pos",
+  "master_log_name",
+  "master_log_pos",
+
+  // todo: remove the next four
+  "checkpoint_relay_log_name",
+  "checkpoint_relay_log_pos",
+  "checkpoint_master_log_name",
+  "checkpoint_master_log_pos",
+
+  "checkpoint_seqno", // index of the last committed group in the bitmap
+  "checkpoint_group_size",
+  "checkpoint_group_bitmap"
+};
+
+Slave_worker::Slave_worker(const char* type, const char* pfs,
+                           Relay_log_info *rli)
+  : Relay_log_info(FALSE), c_rli(rli), curr_group_exec_parts(0),
+  checkpoint_relay_log_pos(0), checkpoint_master_log_pos(0), checkpoint_seqno(0),
+  inited_group_execed(0)
+{
+  checkpoint_relay_log_name[0]= 0;
+  checkpoint_master_log_name[0]= 0;
+}
+
+Slave_worker::~Slave_worker() 
+{
+  if (curr_group_exec_parts)
+    delete curr_group_exec_parts;
+
+  if (inited_group_execed)
+    bitmap_free(&group_execed);
+}
+
+int Slave_worker::init_info()
+{
+  int necessary_to_configure= 0;
+
+  DBUG_ENTER("Slave_worker::init_info");
+
+  if (inited)
+    DBUG_RETURN(0);
+
+  if (!(curr_group_exec_parts= new Database_ids(NAME_LEN)))
+    goto err;
+
+  if (bitmap_init(&group_execed, NULL,
+                  c_rli->checkpoint_group, FALSE))
+    goto err;
+  
+  inited_group_execed= 1;
+  
+  /*
+    The init_info() is used to either create or read information
+    from the repository, in order to initialize the Slave_worker.
+  */
+  necessary_to_configure= check_info();
+
+  if (handler->init_info(uidx, nidx))
+    goto err;
+
+  if (!necessary_to_configure && read_info(handler))
+    goto err;
+
+  if (flush_info(TRUE))
+    goto err;
+
+  inited= 1;
+  DBUG_RETURN(0);
+
+err:
+  sql_print_error("Error reading slave worker configuration");
+  DBUG_RETURN(1);
+}
+
+void Slave_worker::end_info()
+{
+  DBUG_ENTER("Slave_worker::end_info");
+
+  if (!inited)
+    DBUG_VOID_RETURN;
+
+  handler->end_info(uidx, nidx);
+
+  inited = 0;
+
+  DBUG_VOID_RETURN;
+}
+
+int Slave_worker::flush_info(const bool force)
+{
+  DBUG_ENTER("Slave_worker::flush_info");
+
+  /*
+    We update the sync_period at this point because only here we
+    now that we are handling a Slave_worker. This needs to be
+    update every time we call flush because the option may be
+    dinamically set.
+  */
+  handler->set_sync_period(sync_relayloginfo_period);
+
+  if (write_info(handler))
+    goto err;
+
+  if (handler->flush_info(uidx, nidx, force))
+    goto err;
+
+  DBUG_RETURN(0);
+
+err:
+  sql_print_error("Error writing slave worker configuration");
+  DBUG_RETURN(1);
+}
+
+bool Slave_worker::read_info(Rpl_info_handler *from)
+{
+  DBUG_ENTER("Slave_worker::read_info");
+
+  ulong temp_group_relay_log_pos= 0;
+  ulong temp_group_master_log_pos= 0;
+  ulong temp_checkpoint_relay_log_pos= 0;
+  ulong temp_checkpoint_master_log_pos= 0;
+  ulong temp_checkpoint_seqno= 0;
+  ulong nbytes= 0;
+  uchar *buffer= (uchar *) group_execed.bitmap;
+
+  if (from->prepare_info_for_read(nidx))
+    DBUG_RETURN(TRUE);
+
+  if (from->get_info(group_relay_log_name,
+                     (size_t) sizeof(group_relay_log_name),
+                     (char *) "") ||
+      from->get_info((ulong *) &temp_group_relay_log_pos,
+                     (ulong) 0) ||
+      from->get_info(group_master_log_name,
+                     (size_t) sizeof(group_master_log_name),
+                     (char *) "") ||
+      from->get_info((ulong *) &temp_group_master_log_pos,
+                     (ulong) 0) ||
+      from->get_info(checkpoint_relay_log_name,
+                     (size_t) sizeof(checkpoint_relay_log_name),
+                     (char *) "") ||
+      from->get_info((ulong *) &temp_checkpoint_relay_log_pos,
+                     (ulong) 0) ||
+      from->get_info(checkpoint_master_log_name,
+                     (size_t) sizeof(checkpoint_master_log_name),
+                     (char *) "") ||
+      from->get_info((ulong *) &temp_checkpoint_master_log_pos,
+                     (ulong) 0) ||
+      from->get_info((ulong *) &temp_checkpoint_seqno,
+                     (ulong) 0) ||
+      from->get_info(&nbytes, (ulong) 0) ||
+      from->get_info(buffer, (size_t) nbytes,
+                     (uchar *) 0))
+    DBUG_RETURN(TRUE);
+
+  group_relay_log_pos=  temp_group_relay_log_pos;
+  group_master_log_pos= temp_group_master_log_pos;
+  checkpoint_relay_log_pos=  temp_checkpoint_relay_log_pos;
+  checkpoint_master_log_pos= temp_checkpoint_master_log_pos;
+  checkpoint_seqno= temp_checkpoint_seqno;
+
+  DBUG_RETURN(FALSE);
+}
+
+bool Slave_worker::write_info(Rpl_info_handler *to)
+{
+  DBUG_ENTER("Master_info::write_info");
+
+  ulong nbytes= (ulong) no_bytes_in_map(&group_execed);
+  uchar *buffer= (uchar*) group_execed.bitmap;
+
+  if (to->prepare_info_for_write(nidx) ||
+      to->set_info(group_relay_log_name) ||
+      to->set_info((ulong) group_relay_log_pos) ||
+      to->set_info(group_master_log_name) ||
+      to->set_info((ulong) group_master_log_pos) ||
+      to->set_info(checkpoint_relay_log_name) ||
+      to->set_info((ulong) checkpoint_relay_log_pos) ||
+      to->set_info(checkpoint_master_log_name) ||
+      to->set_info((ulong) checkpoint_master_log_pos) ||
+      to->set_info((ulong) checkpoint_seqno) ||
+      to->set_info(nbytes) ||
+      to->set_info(buffer, (size_t) nbytes))
+    DBUG_RETURN(TRUE);
+
+  DBUG_RETURN(FALSE);
+}
+
+size_t Slave_worker::get_number_worker_fields()
+{
+  return sizeof(info_slave_worker_fields)/sizeof(info_slave_worker_fields[0]);
+}
+
+bool Slave_worker::commit_positions(Log_event *ev, Slave_job_group* ptr_g)
+{
+  DBUG_ENTER("Slave_worker::checkpoint_positions");
+
+  bool error= FALSE;
+
+  if (ptr_g->checkpoint_log_name != NULL)
+  {
+    strmake(checkpoint_relay_log_name, ptr_g->checkpoint_relay_log_name,
+            sizeof(checkpoint_relay_log_name) - 1);
+    checkpoint_relay_log_pos= ptr_g->checkpoint_relay_log_pos;
+    strmake(checkpoint_master_log_name, ptr_g->checkpoint_log_name,
+            sizeof(checkpoint_master_log_name) - 1);
+    checkpoint_master_log_pos= ptr_g->checkpoint_log_pos;
+
+    my_free(ptr_g->checkpoint_log_name);
+    ptr_g->checkpoint_log_name= NULL;
+    my_free(ptr_g->checkpoint_relay_log_name);
+    ptr_g->checkpoint_relay_log_name= NULL;
+
+    bitmap_clear_all(&group_execed);
+  }
+  
+  bitmap_set_bit(&group_execed, ptr_g->checkpoint_seqno);
+  checkpoint_seqno= ptr_g->checkpoint_seqno;
+  group_relay_log_pos= ev->future_event_relay_log_pos;
+  group_master_log_pos= ev->log_pos;
+  strmake(group_master_log_name, c_rli->get_group_master_log_name(),
+          sizeof(group_master_log_name)-1);
+
+  error= flush_info(TRUE);
+   
+  DBUG_RETURN(error);
+}
+
+static HASH mapping_db_to_worker;
+static bool inited_hash_workers= FALSE;
+
+PSI_mutex_key key_mutex_slave_worker_hash;
+PSI_cond_key key_cond_slave_worker_hash;
+static  mysql_mutex_t slave_worker_hash_lock;
+static  mysql_cond_t slave_worker_hash_cond;
+
+
+extern "C" uchar *get_key(const uchar *record, size_t *length,
+                          my_bool not_used __attribute__((unused)))
+{
+  DBUG_ENTER("get_key");
+
+  db_worker *entry=(db_worker *) record;
+  *length= strlen(entry->db);
+
+  DBUG_PRINT("info", ("get_key  %s, %d", entry->db, (int) *length));
+
+  DBUG_RETURN((uchar*) entry->db);
+}
+
+
+static void free_entry(db_worker *entry)
+{
+  DBUG_ENTER("free_entry");
+
+  DBUG_PRINT("info", ("free_entry %s, %d", entry->db, (int) strlen(entry->db)));
+
+  my_free((void *) entry->db);
+  my_free(entry);
+
+  DBUG_VOID_RETURN;
+}
+
+bool init_hash_workers(ulong slave_parallel_workers)
+{
+  DBUG_ENTER("init_hash_workers");
+
+  mysql_mutex_init(key_mutex_slave_worker_hash, &slave_worker_hash_lock,
+                   MY_MUTEX_INIT_FAST);
+  mysql_cond_init(key_cond_slave_worker_hash, &slave_worker_hash_cond, NULL);
+
+  inited_hash_workers=
+    (my_hash_init(&mapping_db_to_worker, &my_charset_bin,
+                 0, 0, 0, get_key,
+                 (my_hash_free_key) free_entry, 0) == 0);
+  DBUG_RETURN (!inited_hash_workers);
+}
+
+void destroy_hash_workers()
+{
+  DBUG_ENTER("destroy_hash_workers");
+  if (inited_hash_workers)
+    my_hash_free(&mapping_db_to_worker);
+  mysql_mutex_destroy(&slave_worker_hash_lock);
+  mysql_cond_destroy(&slave_worker_hash_cond);
+
+  DBUG_VOID_RETURN;
+}
+
+/**
+   The function produces a reference to the struct of a Worker
+   that has been or will be engaged to process the @c dbname -keyed  partition (D).
+   It checks a local to Coordinator CGAP list first and returns 
+   @c last_assigned_worker when found (todo: assert).
+
+   Otherwise, the partition is appended to the current group list:
+
+        CGAP .= D
+
+   and a possible  D's Worker id is searched in APH that collects tuples
+   (P, W_id, U, mutex, cond).
+   In case not found,
+
+        W_d := W_c unless W_c is NULL.
+
+   When W_c is NULL it is assigned to a least occupied as defined by
+   @c get_least_occupied_worker().
+
+        W_d := W_c := W_{least_occupied}
+
+        APH .=  a new (D, W_d, 1) 
+
+   In a case APH contains W_d == W_c, (assert U >= 1)
+
+        update APH set  U++ where  APH.P = D
+
+   The case APH contains a W_d != W_c != NULL assigned to D-partition represents
+   the hashing conflict and is handled as the following:
+
+     a. marks the record of APH with a flag requesting to signal in the
+        cond var when `U' the usage counter drops to zero by the other Worker;
+     b. waits for the other Worker to finish tasks on that partition and
+        gets the signal;
+     c. updates the APH record to point to the first Worker (naturally, U := 1),
+        scheduled the event, and goes back into the parallel mode
+
+   @note modifies  CGAP, APH
+
+   @return the pointer to a Worker struct 
+*/
+Slave_worker *get_slave_worker(const char *dbname, Relay_log_info *rli)
+{
+  uint i;
+  char key[NAME_LEN + 2];
+  DYNAMIC_ARRAY *workers= &rli->workers;
+
+  DBUG_ENTER("get_slave_worker");
+
+  if (!inited_hash_workers)
+    DBUG_RETURN(NULL);
+
+  db_worker *entry= NULL;
+  my_hash_value_type hash_value;
+  uchar dblength= (uint) strlen(dbname);
+
+  DBUG_ASSERT(dblength != 0);
+
+  // Search in CGAP
+  for (i= 0; i < rli->curr_group_assigned_parts.elements; i++)
+  {
+    get_dynamic(&rli->curr_group_assigned_parts, (uchar*) key, i);
+    if ((uchar) key[0] != dblength)
+      continue;
+    else
+      if (strncmp(key + 1, const_cast<char*>(dbname), dblength) == 0)
+        DBUG_RETURN(rli->last_assigned_worker);
+  }
+  key[0]= dblength;
+  memcpy(key + 1, dbname, dblength + 1);
+  insert_dynamic(&rli->curr_group_assigned_parts, (uchar*) key);
+
+  DBUG_PRINT("info", ("Searching for %s, %d", dbname, dblength));
+
+  hash_value= my_calc_hash(&mapping_db_to_worker, (uchar*) dbname,
+                           dblength);
+
+  mysql_mutex_lock(&slave_worker_hash_lock);
+
+  entry= (db_worker *)
+    my_hash_search_using_hash_value(&mapping_db_to_worker, hash_value,
+                                    (uchar*) dbname, dblength);
+  if (!entry)
+  {
+    /*
+      The database name was not found which means that a worker never
+      processed events from that database. In such case, we need to
+      map the database to a worker my inserting an entry into the
+      hash map.
+    */
+    my_bool ret;
+    char *db= NULL;
+
+    if (mapping_db_to_worker.records > opt_mts_partition_hash_soft_max)
+    {
+      /* remove zero-usage (todo: relatively rare scheduled) records */
+      for (uint i= 0; i < mapping_db_to_worker.records; i++)
+      {
+        db_worker *entry= (db_worker*) my_hash_element(&mapping_db_to_worker, i);
+        if (entry->usage == 0)
+          my_hash_delete(&mapping_db_to_worker, (uchar*) entry);
+      }
+    }
+
+    mysql_mutex_unlock(&slave_worker_hash_lock);
+
+    DBUG_PRINT("info", ("Inserting %s, %d", dbname, dblength));
+    /*
+      Allocate an entry to be inserted and if the operation fails
+      an error is returned.
+    */
+    if (!(db= (char *) my_malloc((size_t) dblength + 1, MYF(0))))
+      goto err;
+    if (!(entry= (db_worker *) my_malloc(sizeof(db_worker), MYF(0))))
+    {
+      my_free(db);
+      goto err;
+    }
+    strmov(db, dbname);
+    entry->db= db;
+    entry->usage= 1;
+    /*
+      Unless \exists the last assigned Worker, get a free worker based
+      on a policy described in the function get_least_occupied_worker().
+    */
+    entry->worker= !rli->last_assigned_worker ?
+      get_least_occupied_worker(workers) : rli->last_assigned_worker;
+    entry->worker->usage_partition++;
+
+    mysql_mutex_lock(&slave_worker_hash_lock);
+    ret= my_hash_insert(&mapping_db_to_worker, (uchar*) entry);
+    mysql_mutex_unlock(&slave_worker_hash_lock);
+    if (ret)
+    {
+      my_free(db);
+      my_free(entry);
+      entry= NULL;
+      goto err;
+    }
+    DBUG_PRINT("info", ("Inserted %s, %d", entry->db, (int) strlen(entry->db)));
+  }
+  else
+  {
+    /* There is a record. Either  */
+    if (entry->usage == 0)
+    {
+      entry->worker= !rli->last_assigned_worker ? 
+        get_least_occupied_worker(workers) : rli->last_assigned_worker;
+      entry->worker->usage_partition++;
+      entry->usage++;
+
+      my_hash_update(&mapping_db_to_worker, (uchar*) entry,
+                     (uchar*) dbname, dblength);
+    }
+    else if (entry->worker == rli->last_assigned_worker ||
+             !rli->last_assigned_worker)
+    {
+
+      DBUG_ASSERT(entry->worker);
+
+      entry->usage++;
+      my_hash_update(&mapping_db_to_worker, (uchar*) entry,
+                     (uchar*) dbname, dblength);
+    }
+    else
+    {
+      // The case APH contains a W_d != W_c != NULL assigned to
+      // D-partition represents
+      // the hashing conflict and is handled as the following:
+
+      THD *thd= rli->info_thd;
+      const char *proc_info;
+      const char info_format[]=
+        "Waiting for Slave Worker %d to release partition `%s`";
+      char wait_info[sizeof(info_format) + 4*sizeof(entry->worker->id) +
+                     NAME_LEN + 1];
+
+      DBUG_ASSERT(rli->last_assigned_worker != NULL &&
+                  rli->curr_group_assigned_parts.elements > 1);
+
+      // future assignenment and marking at the same time
+      entry->worker= rli->last_assigned_worker;
+
+      sprintf(wait_info, info_format, entry->worker->id, entry->db);
+
+      proc_info= thd->enter_cond(&slave_worker_hash_cond, &slave_worker_hash_lock,
+                                 wait_info);
+      mysql_cond_wait(&slave_worker_hash_cond, &slave_worker_hash_lock);
+      thd->exit_cond(proc_info);
+      mysql_mutex_lock(&slave_worker_hash_lock);
+
+      DBUG_ASSERT(entry->usage == 0);
+
+      entry->usage= 1;
+      entry->worker->usage_partition++;
+
+    }
+    mysql_mutex_unlock(&slave_worker_hash_lock);
+  }
+
+err:
+  if (entry)
+    DBUG_PRINT("info", ("Updating %s with worker %lu", entry->db, entry->worker->id));
+    
+  DBUG_RETURN(entry ? entry->worker : NULL);
+}
+
+/**
+   least_occupied in partition number sense.
+   This might be too coarse and computing based on assigned task
+   is a possibility.
+   Todo: combine two e.g by means of 2-index vector of weights.
+*/
+Slave_worker *get_least_occupied_worker(DYNAMIC_ARRAY *ws)
+{
+  ulong usage= ULONG_MAX;
+  Slave_worker *current_worker= NULL, *worker= NULL;
+  ulong i= 0;
+
+  for (i= 0; i< ws->elements; i++)
+  {
+    get_dynamic(ws, (uchar*) &current_worker, i);
+    if (current_worker->usage_partition <= usage)
+    {
+      worker= current_worker;
+      usage= current_worker->usage_partition;
+    }
+  }
+  
+  DBUG_ASSERT(worker != NULL);
+
+  return(worker);
+}
+
+/**
+   Deallocative routine that makes few things in opposite to
+   @c get_slave_worker().
+
+   Affected by the being committed group APH tuples are updated.
+   @c last_group_done_index member is set to the arg value.
+
+   CGEP the Worker partition cache is cleaned up.
+
+   TODO: reclaim space if the actual size exceeds the limit.
+*/
+
+void Slave_worker::slave_worker_ends_group(Log_event* ev, int error)
+{
+  int i;
+  ulong gaq_idx= ev->mts_group_cnt;
+
+  if (!error)
+  {
+    Slave_job_group *ptr_g=
+      (Slave_job_group *) dynamic_array_ptr(&c_rli->gaq->Q, gaq_idx);
+
+    // first ever group must have relay log name
+    DBUG_ASSERT(last_group_done_index != c_rli->gaq->s ||
+                ptr_g->group_relay_log_name != NULL);
+    DBUG_ASSERT(ptr_g->worker_id == id);
+
+    if (ptr_g->group_relay_log_name != NULL)
+    {
+      // memorizing a new relay-log file name
+
+      DBUG_ASSERT(strlen(ptr_g->group_relay_log_name) + 1
+                  <= sizeof(group_relay_log_name));
+
+      strcpy(group_relay_log_name, ptr_g->group_relay_log_name);
+    }
+
+    if (!(ev->get_type_code() == XID_EVENT && is_transactional()))
+    {
+      commit_positions(ev, ptr_g);
+
+      DBUG_EXECUTE_IF("crash_after_commit_and_update_pos",
+                       DBUG_SUICIDE(););
+    }
+
+    ptr_g->group_master_log_pos= group_master_log_pos;
+    ptr_g->group_relay_log_pos= group_relay_log_pos;
+    ptr_g->done= 1;    // GAQ index is available to C now
+
+    last_group_done_index= gaq_idx;
+  }
+
+  // cleanup relating to the last executed group regardless of error
+
+  for (i= curr_group_exec_parts->dynamic_ids.elements; i > 0; i--)
+  {
+    db_worker *entry= NULL;
+    my_hash_value_type hash_value;
+    char key[NAME_LEN + 2];
+
+    get_dynamic(&(curr_group_exec_parts->dynamic_ids), (uchar*) key, i - 1);
+    hash_value= my_calc_hash(&mapping_db_to_worker, (uchar*) key + 1, key[0]);
+
+    mysql_mutex_lock(&slave_worker_hash_lock);
+
+    entry= (db_worker *)
+      my_hash_search_using_hash_value(&mapping_db_to_worker, hash_value,
+                                      (uchar*) key + 1, key[0]);
+
+    DBUG_ASSERT(entry && entry->usage != 0); // was used to break
+    DBUG_ASSERT(strlen(key + 1) == (uchar) key[0]);
+
+    entry->usage--;
+    my_hash_update(&mapping_db_to_worker, (uchar*) entry,
+                   (uchar*) key + 1, key[0]);
+
+    if (entry->usage == 0)
+    {
+      usage_partition--;
+      if (entry->worker != this) // Coordinator is waiting
+        mysql_cond_signal(&slave_worker_hash_cond);
+    }
+    else
+      DBUG_ASSERT(usage_partition != 0);
+
+    mysql_mutex_unlock(&slave_worker_hash_lock);
+
+    delete_dynamic_element(&(curr_group_exec_parts->dynamic_ids), i - 1);
+  }
+  curr_group_seen_begin= FALSE;
+}
+
+
+/**
+   Class circular_buffer_queue
+*/
+
+ulong circular_buffer_queue::de_queue(uchar *val)
+{
+  ulong ret;
+  if (e == s)
+  {
+    DBUG_ASSERT(len == 0);
+    return (ulong) -1;
+  }
+
+  ret= e;
+  get_dynamic(&Q, val, e);
+  len--;
+  
+  // pre boundary cond
+  if (a == s)
+    a= e;
+  e= (e + 1) % s;
+
+  // post boundary cond
+  if (a == e)
+    e= s;
+
+  DBUG_ASSERT(e == s ||
+              (len == (a >= e)? (a - e) :
+               (s + a - e)));
+  DBUG_ASSERT(a != e);
+
+  return ret;
+}
+
+/**
+   removing an item from the tail side
+*/
+ulong circular_buffer_queue::de_tail(uchar *val)
+{
+  if (e == s)
+  {
+    DBUG_ASSERT(len == 0);
+    return (ulong) -1;
+  }
+
+  a= (e + len - 1) % s;
+  get_dynamic(&Q, val, a);
+  len--;
+  
+  // post boundary cond
+  if (a == e)
+    e= s;
+
+  DBUG_ASSERT(e == s ||
+              (len == (a >= e)? (a - e) :
+               (s + a - e)));
+  DBUG_ASSERT(a != e);
+
+  return a;
+}
+/** 
+    @return the used index at success or -1 when queue is full
+*/
+ulong circular_buffer_queue::en_queue(void *item)
+{
+  ulong ret;
+  if (a == s)
+  {
+    DBUG_ASSERT(a == Q.elements);
+    return (ulong) -1;
+  }
+
+  // store
+
+  ret= a;
+  set_dynamic(&Q, (uchar*) item, a);
+
+
+  // pre-boundary cond
+  if (e == s)
+    e= a;
+  
+  a= (a + 1) % s;
+  len++;
+
+  // post-boundary cond
+  if (a == e)
+    a= s;
+
+  DBUG_ASSERT(a == e || 
+              len == (a >= e) ?
+              (a - e) : (s + a - e));
+  DBUG_ASSERT(a != e);
+
+  return ret;
+}
+
+void* circular_buffer_queue::head_queue()
+{
+  uchar *ret= NULL;
+  if (e == s)
+  {
+    DBUG_ASSERT(len == 0);
+  }
+  else
+  {
+    get_dynamic(&Q, (uchar*) ret, e);
+  }
+  return (void*) ret;
+}
+
+/**
+   two index comparision to determine which of the two
+   is ordered first.
+
+   @note   The caller makes sure the args are within the valid
+           range, incl cases the queue is empty or full.
+
+   @return TRUE  if the first arg identifies a queue entity ordered
+                 after one defined by the 2nd arg,
+           FALSE otherwise.
+*/
+bool circular_buffer_queue::gt(ulong i, ulong k)
+{
+  DBUG_ASSERT(i < s && k < s);
+  DBUG_ASSERT(a != e);
+
+  if (i >= e)
+    if (k >= e)
+      return i > k;
+    else
+      return FALSE;
+  else
+    if (k >= e)
+      return TRUE;
+    else
+      return i > k;
+}
+
+/**
+   The queue is processed from the head item by item
+   to purge items representing committed groups.
+   Progress in GAQ is assessed through comparision of GAQ index value 
+   with Worker's @c last_group_done_index.
+   Purging breaks at a first discovered gap, that is an item
+   that the assinged item->w_id'th Worker has not yet completed.
+
+   The caller is supposed to be the checkpoint handler.
+
+   A copy of the last discarded item containing
+   the refreshed value of the committed low-water-mark is stored
+   into @c lwm container member for further caller's processing.
+   @c last_done is updated with the latest total_seqno for each Worker
+   that was met during GAQ parse.
+
+   @note dyn-allocated members of Slave_job_group such as
+         group_relay_log_name as freed here.
+
+   @return number of discarded items
+*/
+ulong Slave_committed_queue::move_queue_head(DYNAMIC_ARRAY *ws)
+{
+  ulong i, cnt= 0;
+
+  for (i= e; i != a && !empty();)
+  {
+    Slave_worker *w_i;
+    Slave_job_group *ptr_g, g;
+    char grl_name[FN_REFLEN];
+    ulong ind;
+
+    grl_name[0]= 0;
+    ptr_g= (Slave_job_group *) dynamic_array_ptr(&Q, i);
+    if (ptr_g->worker_id == (ulong) -1)
+      break; /* the head is not even assigned */
+    get_dynamic(ws, (uchar *) &w_i, ptr_g->worker_id);
+
+    // no stale last_group_done_index value
+    // DBUG_ASSERT(w_i->checkpoint_notified);
+
+    //if (in(w_i->last_group_done_index) && gt(i, w_i->last_group_done_index))
+    if (!ptr_g->done)
+      break; /* gap at i'th */
+
+    // memorize the last met group_relay_log_name
+    if (ptr_g->group_relay_log_name)
+    {
+      strcpy(grl_name, ptr_g->group_relay_log_name);
+      my_free(ptr_g->group_relay_log_name);
+      ptr_g->group_relay_log_name= NULL;   // mark freed
+    }
+
+    ind= de_queue((uchar*) &g);
+        
+    // stored the memorized name into result struct
+    if (grl_name[0] != 0)
+      strcpy(lwm.group_relay_log_name, grl_name);
+    else
+      lwm.group_relay_log_name[0]= 0;
+
+    DBUG_ASSERT(!ptr_g->group_relay_log_name);
+
+    g.group_relay_log_name= lwm.group_relay_log_name;
+    lwm= g; // the result struct is done for the current iteration
+
+    /* todo/fixme: the least occupied sorting out can be triggered here */
+    /* e.g 
+       set_dynamic(&w_id->c_rli->least_occupied_worker, &w_i->Q.len, w_i->id);
+       sort_dynamic(&w_id->c_rli->least_occupied_worker, (qsort_cmp) ulong_cmp);
+       int ulong_cmp(ulong *id1, ulong *id2)
+       {
+       return *id1 < *id2? -1 : (*id1 > *id2? 1 : 0);
+       }
+    */
+    DBUG_ASSERT(ind == i);
+    DBUG_ASSERT(ptr_g->total_seqno == lwm.total_seqno);
+#ifndef DBUG_OFF
+    {
+      ulonglong l;
+      get_dynamic(&last_done, (uchar *) &l, w_i->id);
+      DBUG_ASSERT(l < ptr_g->total_seqno); // there must be some progress
+    }
+#endif
+    set_dynamic(&last_done, &ptr_g->total_seqno, w_i->id);
+
+    cnt++;
+    i= (i + 1) % s;
+  }
+
+  return cnt;
+}
+
+void Slave_worker::do_report(loglevel level, int err_code, const char *msg, va_list vargs) const
+{
+  c_rli->do_report(level, err_code, msg, vargs);
+}
+
+void Slave_worker::report(loglevel level, int err_code, const char *msg, ...) const
+{
+  va_list vargs;
+  va_start(vargs, msg);
+
+  do_report(level, err_code, msg, vargs);
+  va_end(vargs);
+}
+
+int wait_for_workers_to_finish(Relay_log_info const *rli, Slave_worker *ignore)
+{
+  uint ret= 0;
+  HASH *hash= &mapping_db_to_worker;
+  for (uint i= 0, ret= 0; i < hash->records; i++)
+  {
+    db_worker *entry;
+    THD *thd= rli->info_thd;
+    const char *proc_info;
+    const char info_format[]=
+      "Waiting for Slave Worker %d to release partition `%s`";
+    char wait_info[sizeof(info_format) + 4*sizeof(entry->worker->id) +
+                   NAME_LEN + 1];
+   
+    mysql_mutex_lock(&slave_worker_hash_lock);
+  
+    entry= (db_worker*) my_hash_element(hash, i);
+
+    DBUG_ASSERT(entry);
+
+    if (ignore && entry->worker == ignore)
+    {
+      mysql_mutex_unlock(&slave_worker_hash_lock);
+      continue;
+    }
+
+    if (entry->usage > 0)
+    {
+      sprintf(wait_info, info_format, entry->worker->id, entry->db);
+      entry->worker= NULL;
+
+      proc_info= thd->enter_cond(&slave_worker_hash_cond, &slave_worker_hash_lock,
+                               wait_info);
+      mysql_cond_wait(&slave_worker_hash_cond, &slave_worker_hash_lock);
+      thd->exit_cond(proc_info);
+      ret++;
+
+      DBUG_ASSERT(entry->usage == 0 || thd->killed || rli->abort_slave);
+    }
+    else
+    {
+      mysql_mutex_unlock(&slave_worker_hash_lock);
+    }
+  }
+  return ret;
+}

=== added file 'sql/rpl_rli_pdb.h'
--- a/sql/rpl_rli_pdb.h	1970-01-01 00:00:00 +0000
+++ b/sql/rpl_rli_pdb.h	2011-05-16 19:43:58 +0000
@@ -0,0 +1,290 @@
+#ifndef RPL_RLI_PDB_H
+
+#define RPL_RLI_PDB_H
+
+#include "sql_string.h"
+#include "rpl_rli.h"
+#include <my_sys.h>
+#include <my_bitmap.h>
+
+/* APH entry */
+struct db_worker
+{
+  const char *db;
+  Slave_worker *worker;
+  ulong usage;
+
+  // todo: relax concurrency after making APH mutex/cond pair has worked
+  // pthread_mutex_t
+  // pthread_cond_t
+  // timestamp updated_at;
+
+} typedef db_worker;
+
+bool init_hash_workers(ulong slave_parallel_workers);
+void destroy_hash_workers();
+Slave_worker *get_slave_worker(const char *dbname, Relay_log_info *rli);
+Slave_worker *get_least_occupied_worker(DYNAMIC_ARRAY *workers);
+int wait_for_workers_to_finish(Relay_log_info const *rli,
+                               Slave_worker *ignore= NULL);
+bool critical_worker(Relay_log_info *rli);
+
+#define SLAVE_WORKER_QUEUE_SIZE 8096
+#define SLAVE_INIT_DBS_IN_GROUP 4     // initial allocation for CGEP dynarray
+
+#define NUMBER_OF_FIELDS_TO_IDENTIFY_WORKER 2
+
+typedef struct slave_job_item
+{
+  void *data;
+} Slave_job_item;
+
+/**
+   The class defines a type of queue with a predefined max size that is
+   implemented using the circular memory buffer.
+   That is items of the queue are accessed as indexed elements of
+   the array buffer in a way that when the index value reaches
+   a max value it wraps around to point to the first buffer element.
+*/
+class circular_buffer_queue
+{
+public:
+
+  DYNAMIC_ARRAY Q;
+  ulong s;              // the Size of the queue in terms of element
+  ulong a;              // first Available index to append at (next to tail)
+  ulong e;              // the head index
+  volatile ulong len;   // it is also queried to compute least occupied
+  bool inited_queue;
+
+  circular_buffer_queue(uint el_size, ulong max, uint alloc_inc= 0) :
+    s(max), a(0), e(max), len(0), inited_queue(FALSE)
+  {
+    DBUG_ASSERT(s < ULONG_MAX);
+    if (!my_init_dynamic_array(&Q, el_size, s, alloc_inc))
+      inited_queue= TRUE;
+  }
+  circular_buffer_queue () : inited_queue(FALSE) {}
+  ~circular_buffer_queue ()
+  {
+    if (inited_queue)
+      delete_dynamic(&Q);
+  }
+
+   /**
+      Content of the being dequeued item is copied to the arg-pointer
+      location.
+      
+      @return the queue's array index that the de-queued item
+      located at, or
+      an error encoded in beyond the index legacy range.
+   */
+  ulong de_queue(uchar *);
+  /**
+     Similar to de_queue but extracting happens from the tail side.
+  */
+  ulong de_tail(uchar *val);
+
+  /**
+    return the index where the arg item locates
+           or an error encoded as a value in beyond of the legacy range
+           [0, circular_buffer_max_index].
+           
+           Todo: define the range.
+  */
+  ulong en_queue(void *item);
+  /**
+     return the value of @c data member of the head of the queue.
+  */
+  void* head_queue();
+  bool   gt(ulong i, ulong k); // comparision of ordering of two entities
+  /* index is within the valid range */
+  bool in(ulong k) { return !empty() && 
+      (e > a ? (k >= e || k < a) : (k >= e && k < a)); }
+  bool empty() { return e == s; }
+  bool full() { return a == s; }
+};
+
+typedef struct st_slave_job_group
+{
+  char *group_master_log_name; // (actually redundant)
+  my_off_t master_log_pos;       // B-event log_pos
+  my_off_t group_master_log_pos; // T-event lop_pos filled by W for CheckPoint
+  my_off_t group_relay_log_pos;  // filled by W
+
+  /* 
+     When RL name changes C allocates and fill in a new name of RL,
+     otherwise it fills in NULL.
+     C keeps track of each Worker has been notified on the updating
+     to make sure the routine runs once per change.
+
+     W checks the value at commit and memoriezes a not-NULL.
+     Freeing unless NULL is left to C at CheckPoint.
+  */
+  char     *group_relay_log_name; // The value is last seen relay-log 
+  ulong worker_id;
+  Slave_worker *worker;
+  ulonglong total_seqno;
+
+  /* checkpoint coord are reset by CP and rotate:s */
+  uint  checkpoint_seqno;
+  my_off_t checkpoint_log_pos; // T-event lop_pos filled by W for CheckPoint
+  char*    checkpoint_log_name;
+  my_off_t checkpoint_relay_log_pos; // T-event lop_pos filled by W for CheckPoint
+  char*    checkpoint_relay_log_name;
+  volatile uchar done;  // Flag raised by W,  read and reset by C
+} Slave_job_group;
+
+#define retrieve_job(from, to) \
+  do \
+  { \
+    to.worker_id= from->id; \
+    to.checkpoint_seqno= from->checkpoint_seqno; \
+    to.group_master_log_pos= from->checkpoint_master_log_pos; \
+    to.group_master_log_name= from->checkpoint_master_log_name; \
+    to.group_relay_log_pos= from->checkpoint_relay_log_pos; \
+    to.group_relay_log_name= from->checkpoint_relay_log_name; \
+    to.worker= from; \
+  } while (0)
+
+/**
+  Group Assigned Queue whose first element identifies first gap
+  in committed sequence. The head of the queue is therefore next to 
+  the low-water-mark.
+*/
+class Slave_committed_queue : public circular_buffer_queue
+{
+public:
+
+  /* master's Rot-ev exec */
+  void update_current_binlog(const char *post_rotate);
+
+  /*
+     The last checkpoint time Low-Water-Mark
+  */
+  Slave_job_group lwm;
+  
+  /* last time processed indexes for each worker */
+  DYNAMIC_ARRAY last_done;
+
+  /* the being assigned group index in GAQ */
+  ulong assigned_group_index;
+
+  Slave_committed_queue (const char *log, uint el_size, ulong max, uint n,
+                         uint inc= 0)
+    : circular_buffer_queue(el_size, max, inc)
+  {
+    uint k;
+    ulonglong l= 0;
+    my_init_dynamic_array(&last_done, sizeof(lwm.total_seqno), n, 0);
+    for (k= 0; k < n; k++)
+      insert_dynamic(&last_done, (uchar*) &l);  // empty for each Worker
+    lwm.group_relay_log_name= (char *) my_malloc(FN_REFLEN + 1, MYF(0));
+    lwm.group_relay_log_name[0]= 0;
+  }
+
+  ~Slave_committed_queue ()
+  { 
+    delete_dynamic(&last_done);
+    my_free(lwm.group_relay_log_name);
+  }
+
+  /* Checkpoint routine refreshes the queue */
+  ulong move_queue_head(DYNAMIC_ARRAY *ws);
+};
+
+class Slave_jobs_queue : public circular_buffer_queue
+{
+public:
+
+  /* C marks with true, W signals back at queue back to available */
+  bool overfill;
+  ulonglong waited_overfill;
+};
+
+class Slave_worker : public Relay_log_info
+{
+public:
+  Slave_worker(const char *type, const char *pfs,
+               Relay_log_info *rli);
+  virtual ~Slave_worker();
+
+  mysql_mutex_t jobs_lock;
+  mysql_cond_t  jobs_cond;
+  Slave_jobs_queue jobs;
+
+  Relay_log_info *c_rli;
+
+  Dynamic_ids *curr_group_exec_parts; // CGEP
+  bool curr_group_seen_begin; // is set to TRUE with B-event at Worker exec
+  // @c last_group_done_index is for recovery, although can be viewed
+  //    as statistics as well.
+  // C marks a T-event with the incremented group_cnt that is
+  // an index in GAQ; W stores it at the event execution. 
+  // C polls the value periodically to maintain an array
+  // of the indexes in order to progress on GAQ's lwm, see @c next_event().
+  // see @c Log_event::group_cnt.
+  volatile ulong last_group_done_index; // it's index in GAQ
+
+  List<Log_event> data_in_use; // events are still in use by SQL thread
+  ulong id;
+  TABLE *current_table;
+
+  // rbr
+  RPL_TABLE_LIST *tables_to_lock;           /* RBR: Tables to lock  */
+  uint tables_to_lock_count;        /* RBR: Count of tables to lock */
+  table_mapping m_table_map;      /* RBR: Mapping table-id to table */
+
+  // statictics
+  ulong wait_jobs;  // to gather statistics how many times got idle
+  ulong stmt_jobs;  // how many jobs per stmt
+  ulong trans_jobs;  // how many jobs per trns
+  volatile int curr_jobs; // the current assignments
+  ulong usage_partition; // number of different partitions handled by this worker
+  volatile bool relay_log_change_notified; // Coord sets and resets, W can read
+  volatile bool checkpoint_notified; // Coord sets and resets, W can read
+  bool wq_overrun_set;  // W monitors its queue usage to incr/decr rli->mts_wqs_overrun
+  /*
+    We need to make this a dynamic field. /Alfranio
+  */
+  char partitions[FN_REFLEN];
+  // todo: remove
+  char checkpoint_relay_log_name[FN_REFLEN];
+  ulonglong checkpoint_relay_log_pos;
+
+  char checkpoint_master_log_name[FN_REFLEN];
+  ulonglong checkpoint_master_log_pos;
+  ulong checkpoint_seqno;
+
+  int init_info();
+  void end_info();
+  int flush_info(bool force= FALSE);
+
+  size_t get_number_worker_fields();
+
+  void slave_worker_ends_group(Log_event*, int);  // CGEP walk through to upd APH
+
+  bool commit_positions(Log_event *evt, Slave_job_group *ptr_g);
+
+  void report(loglevel level, int err_code, const char *msg, ...) const
+    ATTRIBUTE_FORMAT(printf, 4, 5);
+  void do_report(loglevel level, int err_code, const char *msg, va_list vargs) const;
+
+  MY_BITMAP group_execed;
+  
+  bool inited_group_execed;
+
+private:
+  bool read_info(Rpl_info_handler *from);
+  bool write_info(Rpl_info_handler *to);
+
+  Slave_worker& operator=(const Slave_worker& info);
+  Slave_worker(const Slave_worker& info);
+};
+
+extern PSI_mutex_key *key_mutex_slave_parallel_worker;
+extern PSI_mutex_key key_mutex_slave_parallel_pend_jobs;
+
+extern PSI_cond_key *key_cond_slave_parallel_worker;
+extern PSI_cond_key key_cond_slave_parallel_pend_jobs;
+#endif

=== modified file 'sql/rpl_slave.cc'
--- a/sql/rpl_slave.cc	2010-12-10 16:55:50 +0000
+++ b/sql/rpl_slave.cc	2011-05-16 19:43:58 +0000
@@ -41,6 +41,7 @@
 #include <mysqld_error.h>
 #include <mysys_err.h>
 #include "rpl_handler.h"
+#include "rpl_info_dummy.h"
 #include <signal.h>
 #include <mysql.h>
 #include <myisam.h>
@@ -50,7 +51,8 @@
 #include "log_event.h"                          // Rotate_log_event,
                                                 // Create_file_log_event,
                                                 // Format_description_log_event
-#include "server_ids.h"
+#include "dynamic_ids.h"
+#include "rpl_rli_pdb.h"
 
 #ifdef HAVE_REPLICATION
 
@@ -141,7 +143,7 @@ failed read"
 };
 
 
-typedef enum { SLAVE_THD_IO, SLAVE_THD_SQL} SLAVE_THD_TYPE;
+typedef enum { SLAVE_THD_IO, SLAVE_THD_SQL, SLAVE_THD_CHECKPOINT } SLAVE_THD_TYPE;
 
 static int process_io_rotate(Master_info* mi, Rotate_log_event* rev);
 static int process_io_create_file(Master_info* mi, Create_file_log_event* cev);
@@ -168,6 +170,7 @@ static int terminate_slave_thread(THD *t
                                   volatile uint *slave_running,
                                   bool skip_lock);
 static bool check_io_slave_killed(THD *thd, Master_info *mi, const char *info);
+int slave_worker_exec_job(Slave_worker * w, Relay_log_info *rli);
 
 /*
   Find out which replications threads are running
@@ -278,8 +281,8 @@ int init_slave()
   if (pthread_key_create(&RPL_MASTER_INFO, NULL))
     DBUG_RETURN(1);
 
-  if ((error= Rpl_info_factory::create(opt_mi_repository_id, &active_mi,
-                                       opt_rli_repository_id, &rli)))
+  if ((error= Rpl_info_factory::create_coordinators(opt_mi_repository_id, &active_mi,
+                                                    opt_rli_repository_id, &rli)))
   {
     error= 1;
     goto err;
@@ -363,8 +366,11 @@ int init_recovery(Master_info* mi, const
 {
   DBUG_ENTER("init_recovery");
 
+  int error= 0;
   Relay_log_info *rli= mi->rli;
-  const char *group_master_log_name=  rli->get_group_master_log_name();
+  char *group_master_log_name= NULL;
+
+  group_master_log_name= const_cast<char *>(rli->get_group_master_log_name());
   if (group_master_log_name[0])
   {
     mi->set_master_log_pos(max(BIN_LOG_HEADER_SIZE,
@@ -380,7 +386,7 @@ int init_recovery(Master_info* mi, const
     rli->set_event_relay_log_pos(BIN_LOG_HEADER_SIZE);
   }
 
-  DBUG_RETURN(0);
+  DBUG_RETURN(error);
 }
 
 int init_info(Master_info* mi, bool ignore_if_no_info, int thread_mask)
@@ -404,11 +410,17 @@ int init_info(Master_info* mi, bool igno
     thread is being started and the relay log info if either the
     SLAVE_SQL thread is being started or was not initialized as it is
     required by the SLAVE_IO thread.
+
+    In a multi-master envinroment, we need to make sure that both master
+    info and relay log info are prepared to handle events from all
+    masters. In such case, we need to execute the code below for each
+    master and correctly set the key_info_idx. /Alfranio
   */
   necessary_to_configure= mi->check_info();
   if (!(ignore_if_no_info && necessary_to_configure))
   {
-    if ((thread_mask & SLAVE_IO) != 0 && mi->init_info())
+    if ((thread_mask & SLAVE_IO) != 0 &&
+        mi->init_info())
       error= 1;
   }
 
@@ -432,8 +444,13 @@ void end_info(Master_info* mi)
   DBUG_ASSERT(mi != NULL && mi->rli != NULL);
 
   /*
-    The previous implementation was not acquiring locks.
-    We do the same here. However, this is quite strange.
+    The previous implementation was not acquiring locks.  We do the same here.
+    However, this is quite strange.
+
+    In a multi-master envinroment, we need to make sure that both master
+    info and relay log info are prepared to handle events from all
+    masters. In such case, we need to execute the code below for each
+    master and correctly set the key_info_idx. /Alfranio
   */
   mi->end_info();
   mi->rli->end_info();
@@ -443,7 +460,8 @@ void end_info(Master_info* mi)
 
 int remove_info(Master_info* mi)
 {
-  int error= 0;
+  int error= 1;
+  Slave_worker *worker= NULL;
   DBUG_ENTER("remove_info");
   DBUG_ASSERT(mi != NULL && mi->rli != NULL);
 
@@ -460,12 +478,39 @@ int remove_info(Master_info* mi)
   mi->rli->clear_until_condition();
   mi->rli->clear_sql_delay();
 
+  /*
+    In a multi-master envinroment, we need to make sure that both master
+    info and relay log info are prepared to handle events from all
+    masters. In such case, we need to execute the code below for each
+    master and correctly set the key_info_idx. /Alfranio
+  */
   mi->end_info();
   mi->rli->end_info();
 
-  if (mi->remove_info() || mi->rli->remove_info())
-    error= 1;
+  if (mi->remove_info())
+    goto err;
 
+  for (uint id= 0; id < mi->rli->recovery_parallel_workers; id++)
+  {
+    if (!(worker=
+          Rpl_info_factory::create_worker(opt_worker_repository_id, id, mi->rli)))
+      goto err;
+
+    if (worker->init_info() || worker->remove_info())
+    {
+      delete worker;
+      goto err;
+    }
+
+    delete worker;
+  }
+
+  if (mi->rli->remove_info())
+    goto err;
+
+  error= 0;
+
+err:
   DBUG_RETURN(error);
 }
 
@@ -497,7 +542,14 @@ int flush_master_info(Master_info* mi, b
 
   mysql_mutex_lock(log_lock);
 
-  int err=  (mi->rli->flush_current_log() ||  mi->flush_info(force));
+  /*
+    In a multi-master envinroment, we need to make sure that both master
+    info and relay log info are prepared to handle events from all
+    masters. In such case, we need to execute the code below for each
+    master and correctly set the key_info_idx. /Alfranio
+  */
+  int err=  (mi->rli->flush_current_log() ||
+             mi->flush_info(force));
 
   mysql_mutex_unlock(log_lock);
 
@@ -647,6 +699,11 @@ int terminate_slave_threads(Master_info*
 
     /*
       Flushes the master info regardles of the sync_master_info option.
+
+      In a multi-master envinroment, we need to make sure that both master
+      info and relay log info are prepared to handle events from all
+      masters. In such case, we need to execute the code below for each
+      master and correctly set the key_info_idx. /Alfranio
     */
     if (mi->flush_info(TRUE))
       DBUG_RETURN(ER_ERROR_DURING_FLUSH_LOGS);
@@ -679,6 +736,11 @@ int terminate_slave_threads(Master_info*
 
     /*
       Flushes the relay log info regardles of the sync_relay_log_info option.
+
+      In a multi-master envinroment, we need to make sure that both master
+      info and relay log info are prepared to handle events from all
+      masters. In such case, we need to execute the code below for each
+      master and correctly set the key_info_idx. /Alfranio
     */
     if (mi->rli->flush_info(TRUE))
       DBUG_RETURN(ER_ERROR_DURING_FLUSH_LOGS);
@@ -1013,8 +1075,8 @@ static bool sql_slave_killed(THD* thd, R
   bool ret= FALSE;
   DBUG_ENTER("sql_slave_killed");
 
-  DBUG_ASSERT(rli->info_thd == thd);
-  DBUG_ASSERT(rli->slave_running == 1);// tracking buffer overrun
+  DBUG_ASSERT(rli->info_thd == thd || thd->slave_thread);
+  DBUG_ASSERT(rli->slave_running == 1 || thd->slave_thread);// tracking buffer overrun
   if (abort_loop || thd->killed || rli->abort_slave)
   {
     /*
@@ -1530,7 +1592,7 @@ maybe it is a *VERY OLD MASTER*.");
     mysql_free_result(master_res);
     master_res= NULL;
   }
-  if (mi->master_id == 0 && mi->ignore_server_ids->server_ids.elements > 0)
+  if (mi->master_id == 0 && mi->ignore_server_ids->dynamic_ids.elements > 0)
   {
     errmsg= "Slave configured with server id filtering could not detect the master server id.";
     err_code= ER_SLAVE_FATAL_ERROR;
@@ -2214,11 +2276,11 @@ bool show_master_info(THD* thd, Master_i
       char buff[FN_REFLEN];
       ulong i, cur_len;
       for (i= 0, buff[0]= 0, cur_len= 0;
-           i < mi->ignore_server_ids->server_ids.elements; i++)
+           i < mi->ignore_server_ids->dynamic_ids.elements; i++)
       {
         ulong s_id, slen;
         char sbuff[FN_REFLEN];
-        get_dynamic(&(mi->ignore_server_ids->server_ids), (uchar*) &s_id, i);
+        get_dynamic(&(mi->ignore_server_ids->dynamic_ids), (uchar*) &s_id, i);
         slen= sprintf(sbuff, (i == 0 ? "%lu" : ", %lu"), s_id);
         if (cur_len + slen + 4 > FN_REFLEN)
         {
@@ -2649,6 +2711,14 @@ static int sql_delay_event(Log_event *ev
   DBUG_RETURN(0);
 }
 
+/**
+   a sort_dynamic function on ulong type
+   returns as specified by @c qsort_cmp
+*/
+int ulong_cmp(ulong *id1, ulong *id2)
+{
+  return *id1 < *id2? -1 : (*id1 > *id2? 1 : 0);
+}
 
 /**
   Applies the given event and advances the relay log position.
@@ -2748,7 +2818,13 @@ int apply_event_and_update_pos(Log_event
   }
   if (reason == Log_event::EVENT_SKIP_NOT)
   {
+    /* 
+       MTS-todo: to test neither skipping nor delayed-exec logics
+       are affected by parallel exec mode.
+    */
+
     // Sleeps if needed, and unlocks rli->data_lock.
+
     if (sql_delay_event(ev, thd, rli))
       DBUG_RETURN(0);
     exec_res= ev->apply_event(rli);
@@ -2756,27 +2832,6 @@ int apply_event_and_update_pos(Log_event
   else
     mysql_mutex_unlock(&rli->data_lock);
 
-#ifndef DBUG_OFF
-  /*
-    This only prints information to the debug trace.
-
-    TODO: Print an informational message to the error log?
-  */
-  static const char *const explain[] = {
-    // EVENT_SKIP_NOT,
-    "not skipped",
-    // EVENT_SKIP_IGNORE,
-    "skipped because event should be ignored",
-    // EVENT_SKIP_COUNT
-    "skipped because event skip counter was non-zero"
-  };
-  DBUG_PRINT("info", ("OPTION_BEGIN: %d; IN_STMT: %d",
-                      test(thd->variables.option_bits & OPTION_BEGIN),
-                      rli->get_flag(Relay_log_info::IN_STMT)));
-  DBUG_PRINT("skip_event", ("%s event was %s",
-                            ev->get_type_str(), explain[reason]));
-#endif
-
   DBUG_PRINT("info", ("apply_event error = %d", exec_res));
   if (exec_res == 0)
   {
@@ -2792,31 +2847,66 @@ int apply_event_and_update_pos(Log_event
       See sql/rpl_rli.h for further details.
     */
     int error= 0;
-    if (!(ev->get_type_code() == XID_EVENT && rli->is_transactional()) ||
-        skip_event)
+    if (skip_event || !rli->is_parallel_exec() || !rli->curr_group_is_parallel)
+    {
+#ifndef DBUG_OFF
+      /*
+        This only prints information to the debug trace.
+        
+        TODO: Print an informational message to the error log?
+      */
+      static const char *const explain[] = {
+        // EVENT_SKIP_NOT,
+        "not skipped",
+        // EVENT_SKIP_IGNORE,
+        "skipped because event should be ignored",
+        // EVENT_SKIP_COUNT
+        "skipped because event skip counter was non-zero"
+      };
+      DBUG_PRINT("info", ("OPTION_BEGIN: %d; IN_STMT: %d",
+                          test(thd->variables.option_bits & OPTION_BEGIN),
+                          rli->get_flag(Relay_log_info::IN_STMT)));
+      DBUG_PRINT("skip_event", ("%s event was %s",
+                                ev->get_type_str(), explain[reason]));
+#endif
+
       error= ev->update_pos(rli);
+
 #ifndef DBUG_OFF
-    DBUG_PRINT("info", ("update_pos error = %d", error));
-    if (!rli->belongs_to_client())
+      DBUG_PRINT("info", ("update_pos error = %d", error));
+      if (!rli->belongs_to_client())
+      {
+        char buf[22];
+        DBUG_PRINT("info", ("group %s %s",
+                            llstr(rli->get_group_relay_log_pos(), buf),
+                            rli->get_group_relay_log_name()));
+        DBUG_PRINT("info", ("event %s %s",
+                            llstr(rli->get_event_relay_log_pos(), buf),
+                            rli->get_event_relay_log_name()));
+      }
+#endif
+    }
+    else
     {
-      char buf[22];
-      DBUG_PRINT("info", ("group %s %s",
-                          llstr(rli->get_group_relay_log_pos(), buf),
-                          rli->get_group_relay_log_name()));
-      DBUG_PRINT("info", ("event %s %s",
-                          llstr(rli->get_event_relay_log_pos(), buf),
-                          rli->get_event_relay_log_name()));
+      DBUG_ASSERT(rli->is_parallel_exec());
+      /* 
+         event_relay_log_pos is an anchor to possible reading restart.
+         It may become lt than group_* value.
+         However event_relay_log_pos does not affect group_relay_log_pos
+         othen that through the sequentially executed events or via checkpoint.
+      */
+      rli->inc_event_relay_log_pos();
     }
-#endif
-    /*
-      The update should not fail, so print an error message and
-      return an error code.
 
-      TODO: Replace this with a decent error message when merged
-      with BUG#24954 (which adds several new error message).
-    */
     if (error)
     {
+      /*
+        The update should not fail, so print an error message and
+        return an error code.
+        
+        TODO: Replace this with a decent error message when merged
+        with BUG#24954 (which adds several new error message).
+      */
       char buf[22];
       rli->report(ERROR_LEVEL, ER_UNKNOWN_ERROR,
                   "It was not possible to update the positions"
@@ -2832,7 +2922,6 @@ int apply_event_and_update_pos(Log_event
   DBUG_RETURN(exec_res ? 1 : 0);
 }
 
-
 /**
   Top-level function for executing the next event in the relay log.
   This is called from the SQL thread.
@@ -2906,11 +2995,15 @@ static int exec_relay_log_event(THD* thd
     /*
       This tests if the position of the beginning of the current event
       hits the UNTIL barrier.
+      MTS: since master,relay-group coordinates change per checkpoint
+      at the end of the checkpoint interval UNTIL can be left far behind.
+      Hence, UNTIL forces the sequential applying.
     */
     if (rli->until_condition != Relay_log_info::UNTIL_NONE &&
         rli->is_until_satisfied(thd, ev))
     {
       char buf[22];
+
       sql_print_information("Slave SQL thread stopped because it reached its"
                             " UNTIL position %s", llstr(rli->until_pos(), buf));
       /*
@@ -2950,16 +3043,39 @@ static int exec_relay_log_event(THD* thd
       used to read info about the relay log's format; it will be deleted when
       the SQL thread does not need it, i.e. when this thread terminates.
     */
-    if (ev->get_type_code() != FORMAT_DESCRIPTION_EVENT)
+    // if (ev->get_type_code() != FORMAT_DESCRIPTION_EVENT)
     {
-      if (thd->variables.binlog_rows_query_log_events)
-        handle_rows_query_log_event(ev, rli);
-
-      DBUG_PRINT("info", ("Deleting the event after it has been executed"));
-      if (ev->get_type_code() != ROWS_QUERY_LOG_EVENT)
+      if ((!rli->is_parallel_exec() || !rli->curr_group_is_parallel))
       {
-        delete ev;
-        ev= NULL;
+        DBUG_ASSERT(!rli->is_parallel_exec() || !rli->curr_group_is_parallel ||
+                    ev->shall_skip(rli) != Log_event::EVENT_SKIP_NOT);
+
+        if (rli->curr_group_split)
+        {
+         // the current group split status is reset
+          rli->curr_group_is_parallel= TRUE;
+          rli->curr_group_split= FALSE;
+        }
+        if (ev->get_type_code() != FORMAT_DESCRIPTION_EVENT)
+        {
+          /* MTS/ TODO.
+
+             ROWS_QUERY_LOG_EVENT could be supported easier if
+             destructing part of handle_rows_query_log_event would be merged
+             with rli->cleanup_context() and the rest move into 
+             ROWS...::do_apply_event
+          */
+          if (!rli->is_parallel_exec())
+            if (thd->variables.binlog_rows_query_log_events)
+              handle_rows_query_log_event(ev, rli);
+          
+          if (ev->get_type_code() != ROWS_QUERY_LOG_EVENT)
+          {
+            DBUG_PRINT("info", ("Deleting the event after it has been executed"));
+            delete ev;
+            ev= NULL;
+          }
+        }
       }
     }
 
@@ -3576,6 +3692,749 @@ int check_temp_dir(char* tmp_file)
   DBUG_RETURN(0);
 }
 
+Slave_job_item * de_queue(Slave_jobs_queue *jobs, Slave_job_item *ret);
+
+/*
+  Worker thread for the parallel execution of the replication events
+  MHS_todo: consider how to handle error
+*/
+pthread_handler_t handle_slave_worker(void *arg)
+{
+  THD *thd;                     /* needs to be first for thread_stack */
+  int error= 0;
+  Slave_worker *w= (Slave_worker *) arg;
+  Relay_log_info* rli= w->c_rli;
+  ulong purge_cnt= 0;
+  ulonglong purge_size= 0;
+  struct slave_job_item _item, *job_item= &_item;
+
+  my_thread_init();
+  DBUG_ENTER("handle_slave_worker");
+
+  thd= new THD;
+  if (!thd)
+  {
+    sql_print_error("Failed during slave worker initialization");
+    goto err;
+  }
+  w->info_thd= thd;
+  thd->thread_stack = (char*)&thd;
+  
+  pthread_detach_this_thread();
+  if (init_slave_thread(thd, SLAVE_THD_SQL))  // todo: make thd->sys_thr= worker
+  {
+    // todo make SQL thread killed
+    sql_print_error("Failed during slave worker initialization");
+    goto err;
+  }
+  thd->init_for_queries();
+  mysql_mutex_lock(&LOCK_thread_count);
+  threads.append(thd);
+  mysql_mutex_unlock(&LOCK_thread_count);
+
+  mysql_mutex_lock(&w->jobs_lock);
+
+  DBUG_ASSERT(w->jobs.len == rli->mts_slave_worker_queue_len_max + 1);
+  w->jobs.len= 0;
+  mysql_cond_signal(&w->jobs_cond);  // ready for duty
+
+  mysql_mutex_unlock(&w->jobs_lock);
+
+  DBUG_ASSERT(thd->is_slave_error == 0);
+
+  while (!thd->killed && !error)
+  {
+      error= slave_worker_exec_job(w, rli);
+  }
+
+  if (error)
+  {
+    mysql_mutex_lock(&rli->info_thd->LOCK_thd_data);
+    rli->info_thd->awake(THD::KILL_QUERY);          // notify Crdn
+    mysql_mutex_unlock(&rli->info_thd->LOCK_thd_data);
+    thd->clear_error();
+    w->cleanup_context(thd, error);
+  }
+
+  mysql_mutex_lock(&w->jobs_lock);
+
+  while(de_queue(&w->jobs, job_item))
+  {
+    purge_cnt++;
+    purge_size += ((Log_event*) (job_item->data))->data_written;
+    DBUG_ASSERT(job_item->data);
+    delete static_cast<Log_event*>(job_item->data);
+  }
+
+  DBUG_ASSERT(w->jobs.len == 0);
+
+  mysql_mutex_unlock(&w->jobs_lock);
+
+  mysql_mutex_lock(&rli->pending_jobs_lock);
+  rli->pending_jobs -= purge_cnt;
+  rli->mts_pending_jobs_size -= purge_size;
+  DBUG_ASSERT(rli->mts_pending_jobs_size < rli->mts_pending_jobs_size_max);
+
+  mysql_mutex_unlock(&rli->pending_jobs_lock);
+
+  mysql_mutex_lock(&w->jobs_lock);
+  w->jobs.len= rli->mts_slave_worker_queue_len_max + 1;
+  sql_print_information("Worker %lu statistics: "
+                        "events processed = %lu "
+                        "hungry waits = %lu "
+                        "priv queue overfills = %llu "
+                        ,w->id, w->stmt_jobs, w->wait_jobs, w->jobs.waited_overfill);
+
+  mysql_cond_signal(&w->jobs_cond);  // famous last goodbye
+  mysql_mutex_unlock(&w->jobs_lock);
+
+err:
+
+  if (thd)
+  {
+    mysql_mutex_lock(&LOCK_thread_count);
+    THD_CHECK_SENTRY(thd);
+    /*
+      to avoid close_temporary_tables() closing temp tables as those
+      are Coordinator's burden.
+    */
+    thd->system_thread= NON_SYSTEM_THREAD;
+    delete thd;
+    mysql_mutex_unlock(&LOCK_thread_count);
+  }
+
+  my_thread_end();
+  pthread_exit(0);
+  DBUG_RETURN(0); 
+}
+
+/**
+   Orders jobs by comparing relay log information.
+*/
+#if 0
+int mts_recovery_cmp(Slave_job_group *id1, Slave_job_group *id2)
+#endif
+
+int mts_event_coord_cmp(LOG_POS_COORD *id1, LOG_POS_COORD *id2)
+{
+  longlong filecmp= strcmp(id1->file_name, id2->file_name);
+  longlong poscmp= id1->pos - id2->pos;
+  return (filecmp < 0  ? -1 : (filecmp > 0  ?  1 :
+         (poscmp  < 0  ? -1 : (poscmp  > 0  ?  1 : 0))));
+}
+
+bool mts_recovery_groups(Relay_log_info *rli, MY_BITMAP *groups)
+{ 
+  Log_event *ev= NULL; // , *desc= NULL;
+  const char *log_name= NULL;
+  const char *errmsg= NULL;
+  bool error= FALSE;
+  DYNAMIC_ARRAY above_lwm_jobs;
+  bool curr_group_seen_begin= FALSE;
+  Slave_job_group job_worker;
+  Slave_job_group job_file;
+  IO_CACHE log;
+  File file;
+  MY_STAT s;
+
+  LOG_POS_COORD cp=
+  {
+    (char *) rli->get_group_master_log_name(),
+    rli->get_group_master_log_pos()
+  };
+
+  DBUG_ENTER("mts_recovery_groups");
+  DBUG_ASSERT(rli->recovery_parallel_workers > 0);
+
+  /*
+    Gathers information on valuable workers and stores it in 
+    above_lwm_jobs in asc ordered by the master binlog coordinates.
+  */
+  my_init_dynamic_array(&above_lwm_jobs, sizeof(Slave_job_group),
+                        rli->recovery_parallel_workers, rli->recovery_parallel_workers);
+
+  for (uint id= 0; id < rli->recovery_parallel_workers; id++)
+  {
+    Slave_worker *worker=
+      Rpl_info_factory::create_worker(opt_worker_repository_id, id, rli);
+    worker->init_info();
+    retrieve_job(worker, job_file);
+    LOG_POS_COORD w_last= {worker->group_master_log_name, worker->group_master_log_pos};
+    if (mts_event_coord_cmp(&w_last, &cp) > 0)
+      insert_dynamic(&above_lwm_jobs, (uchar*) &job_file);
+    else
+      delete worker;
+  };
+
+  sort_dynamic(&above_lwm_jobs, (qsort_cmp) mts_event_coord_cmp);
+  /*
+    In what follows, the group Recovery Bitmap is constructed.
+
+     seek(lwm);
+
+     while(w= next(above_lwm_w))
+       do
+         read G
+         if G == w->last_comm
+           w.B << group_cnt++;
+           RB |= w.B;
+            break;
+         else
+           group_cnt++;
+        while(!eof);
+        continue;
+  */
+  Format_description_log_event fdle(BINLOG_VERSION);
+  if (!fdle.is_valid())
+  {
+    error= TRUE;
+    goto err;
+  }
+
+  log_name= const_cast<Relay_log_info*>(rli)->get_group_relay_log_name();
+  if ((file= open_binlog(&log, log_name, &errmsg)) < 0)
+  {
+    error= TRUE;
+    sql_print_error("%s", errmsg);
+    goto err;
+  }
+             
+  DBUG_ASSERT(my_stat(log_name, &s, MYF(0))); // TODO: Alfranio, why my_stat?
+  my_b_seek(&log, (my_off_t) rli->get_group_relay_log_pos());
+
+  bitmap_clear_all(groups);
+  rli->mts_recovery_group_cnt= 0;
+  for (uint it_job= 0; it_job < above_lwm_jobs.elements; it_job++)
+  {
+    Slave_worker *w= ((Slave_job_group *)
+                      dynamic_array_ptr(&above_lwm_jobs, it_job))->worker;
+    LOG_POS_COORD w_last= { w->group_master_log_name, w->group_master_log_pos };
+
+    sql_print_information("Recoverying relay log info based on Worker-Id %lu, "
+                          "group_relay_log_name %s, group_relay_log_pos %lu "
+                          "group_master_log_name %s, group_master_log_pos %lu",
+                          w->id,
+                          w->group_relay_log_name,
+                          (ulong) w->group_relay_log_pos,
+                          w->group_master_log_name,
+                          (ulong) w->group_master_log_pos);
+
+    // TODO: extend to handle sequence of relay logs (read(ev) -> EOF)
+
+    while ((ev= Log_event::read_log_event(&log, 0, &fdle,
+                                          opt_master_verify_checksum)))
+    {
+        DBUG_ASSERT(ev->is_valid());
+        DBUG_ASSERT(rli->mts_recovery_group_cnt < rli->checkpoint_group);
+
+        if (ev->starts_group())
+          curr_group_seen_begin= TRUE;
+        else
+          if (ev->ends_group())
+          {
+            int ret;
+            LOG_POS_COORD ev_coord= { (char *) rli->get_group_master_log_name(),
+                                      ev->log_pos };
+            if ((ret= mts_event_coord_cmp(&ev_coord, &w_last)) == 0)
+            {
+              /* 
+                 hit it
+                 w.B << group_cnt++;
+                 RB |= w.B;
+              */
+              for (uint i= w->checkpoint_seqno - rli->mts_recovery_group_cnt, j= 0;
+                   i <= w->checkpoint_seqno; i++, j++)
+              {
+                //bitmap_intersect(&rli->groups, &w->group_execed);
+                if (_bitmap_is_set(&w->group_execed, i))
+                  bitmap_fast_test_and_set(groups, j);
+              }
+              rli->mts_recovery_group_cnt++;
+              delete ev;
+              ev= NULL;
+              break;
+            }
+            else
+            {
+              DBUG_ASSERT(ret < 0);
+              rli->mts_recovery_group_cnt++;
+            }
+          }
+        delete ev;
+        ev= NULL;
+    }
+  }
+
+  DBUG_ASSERT(rli->mts_recovery_group_cnt < groups->n_bits);
+
+#if 0
+  for (uint it_job= 0; it_job < above_lwm_jobs.elements; it_job++)
+  {
+    group_worker_counter= 0;
+    group_lwm_counter= 0;
+    get_dynamic(&above_lwm_jobs, (uchar *) &job_worker, it_job);
+
+    sql_print_information("Recoverying relay log info based on Worker-Id %lu, "
+                          "group_relay_log_name %s, group_relay_log_pos %lu "
+                          "group_master_log_name %s, group_master_log_pos %lu",
+                          job_worker.worker_id,
+                          job_worker.group_relay_log_name,
+                          (ulong) job_worker.group_relay_log_pos,
+                          job_worker.group_master_log_name,
+                          (ulong) job_worker.group_master_log_pos);
+
+    for (uint it_file= 0; it_file < above_lwm_jobs.elements; it_file++)
+    {
+      get_dynamic(&above_lwm_jobs, (uchar *) &job_file, it_file);
+
+      /*
+        Either the current relay log file was already processed by the
+        current worker or all groups were analyzed. So, the next file
+        is checked.
+      */
+      if  ((strcmp(job_worker.group_relay_log_name,
+                  job_file.group_relay_log_name) > 0) ||
+           (group_worker_counter > (rli->checkpoint_group - 1)))
+        continue;
+
+      if (desc)
+      {
+        delete desc;
+        desc= NULL;
+      }
+
+      if (log_name)
+      {
+        end_io_cache(&log);
+        mysql_file_close(file, MYF(MY_WME));
+        log_name= NULL;
+      }
+
+      if ((file= open_binlog(&log, job_file.group_relay_log_name, &errmsg)) < 0)
+      {
+        sql_print_error("%s", errmsg);
+        goto end;
+      }
+      log_name= job_file.group_relay_log_name;
+      my_stat(log_name, &s, MYF(0));
+
+      if (!((desc= Log_event::read_log_event(&log, 0, &fdle,
+                                             opt_master_verify_checksum)) &&
+           desc->get_type_code() == FORMAT_DESCRIPTION_EVENT))
+        goto end;
+    
+      my_b_seek(&log, (my_off_t) 0);
+      while ((ev= Log_event::read_log_event(&log, 0, &fdle,
+              opt_master_verify_checksum)))
+      {
+        DBUG_ASSERT(ev->is_valid());
+
+        /*
+          All groups were analyzed. So, the next worker needs to
+          be checked.
+        */
+        if (group_worker_counter > (rli->checkpoint_group - 1))
+          break;
+
+        filecmp= strcmp(job_file.group_relay_log_name,
+                        job_worker.group_relay_log_name);
+        poscmp= ev->log_pos -
+                job_worker.group_master_log_pos;
+
+        if (filecmp > 0 || (filecmp == 0 && poscmp > 0))
+        {
+          bool unhandled= !bitmap_is_set(&job_worker.worker->group_execed,
+                                         group_worker_counter);
+          if (ev->starts_group())
+            curr_group_seen_begin= TRUE;
+
+          if (ev->ends_group() || !curr_group_seen_begin)
+          {
+            filecmp= strcmp(job_file.group_relay_log_name,
+                            rli->get_group_master_log_name());
+            poscmp= ev->log_pos -
+                    rli->get_group_master_log_pos();
+            if (filecmp > 0 || (filecmp == 0 && poscmp > 0))
+            {
+              if (unhandled)
+                bitmap_is_set(groups, group_lwm_counter);
+              group_lwm_counter++;
+            }
+            curr_group_seen_begin= FALSE;
+            group_worker_counter++;
+          }
+        }
+
+        delete ev;
+        ev= NULL;
+      }
+    }
+  }
+
+end:
+  if (desc)
+  {
+    delete desc;
+    desc= NULL;
+  }
+
+
+  if (log_name)
+  {
+    end_io_cache(&log);
+    mysql_file_close(file, MYF(MY_WME));
+    log_name= NULL;
+  }
+#endif
+
+  end_io_cache(&log);
+  mysql_file_close(file, MYF(MY_WME));
+  log_name= NULL;
+
+err:
+  
+  for (uint it_job= 0; it_job < above_lwm_jobs.elements; it_job++)
+  {
+    get_dynamic(&above_lwm_jobs, (uchar *) &job_worker, it_job);
+    job_worker.worker->end_info();
+    delete job_worker.worker;
+  }
+
+  delete_dynamic(&above_lwm_jobs);
+
+  DBUG_RETURN(error);
+}
+
+/**
+   Processing rli->gaq to find out the low-water-mark coordinates
+   stored into the cental recovery table.
+
+
+   @return FALSE success, TRUE otherwise
+*/
+bool mts_checkpoint_routine(Relay_log_info *rli, ulonglong period,
+                            bool force, bool locked)
+{
+  ulong cnt;
+  bool error= FALSE;
+  struct timespec curr_clock;
+
+  DBUG_ENTER("checkpoint_routine");
+
+  set_timespec_nsec(curr_clock, 0);
+  ulong diff= diff_timespec(curr_clock, rli->last_clock);
+  if (!force && diff < period && !rli->gaq->full())
+  {
+    /*
+      We do not need to execute the checkpoint now because
+      the time elapsed is not enough.
+    */
+    DBUG_RETURN(FALSE);
+  }
+      
+  do
+  {
+    cnt= rli->gaq->move_queue_head(&rli->workers);
+  } while (cnt == 0 && (rli->gaq->full()  || force) &&
+           (my_sleep(rli->mts_coordinator_basic_nap), 1));
+  if (cnt == 0)
+    goto end;
+
+
+  /* TODO: 
+     to turn the least occupied selection in terms of jobs pieces
+  */
+  for (uint i= 0; i < rli->workers.elements; i++)
+  {
+    Slave_worker *w_i;
+    get_dynamic(&rli->workers, (uchar *) &w_i, i);
+    set_dynamic(&rli->least_occupied_workers, (uchar*) &w_i->jobs.len, w_i->id);
+  };
+  sort_dynamic(&rli->least_occupied_workers, (qsort_cmp) ulong_cmp);
+
+  if (!locked)
+    mysql_mutex_lock(&rli->data_lock);
+
+  /*
+    Coordinator::commit_positions() {
+
+    rli->gaq->lwm contains all but rli->group_master_log_name
+
+    group_master_log_name is updated only by Coordinator and it can't change
+    within checkpoint interval because Coordinator flushes the updated value
+    at once.
+    Note, unlike group_master_log_name, event_relay_log_pos is updated solely 
+    within Coordinator read loop context. Hence, it's possible at times 
+    event_rlp > group_rlp.
+  */
+  rli->set_group_master_log_pos(rli->gaq->lwm.group_master_log_pos);
+  rli->set_group_relay_log_pos(rli->gaq->lwm.group_relay_log_pos);
+
+  if (rli->gaq->lwm.group_relay_log_name[0] != 0)
+    rli->set_group_relay_log_name(rli->gaq->lwm.group_relay_log_name);
+
+  /* 
+     todo: uncomment notifies when UNTIL will be supported
+
+     rli->notify_group_master_log_name_update();
+     rli->notify_group_relay_log_name_update();
+
+     Todo: optimize with if (wait_flag) broadcast
+         waiter: set wait_flag; waits....; drops wait_flag;
+  */
+  mysql_cond_broadcast(&rli->data_cond);
+  if (!locked)
+    mysql_mutex_unlock(&rli->data_lock);
+
+  error= rli->flush_info(TRUE);
+  /*
+    } // end of commit_positions
+  */
+
+  rli->reset_notified_checkpoint();
+
+end:
+  set_timespec_nsec(rli->last_clock, 0);
+  
+  DBUG_RETURN(error);
+}
+
+/**
+   A single Worker thread is forked out.
+   
+   @return 0 suppress or 1 if fails
+*/
+int slave_start_single_worker(Relay_log_info *rli, ulong i)
+{
+  int error= 0;
+  uint k;
+  pthread_t th;
+  Slave_worker *w= NULL;
+  Slave_job_item empty= {NULL};
+
+  if (!(w=
+      Rpl_info_factory::create_worker(opt_worker_repository_id, i, rli)))
+  {
+    sql_print_error("Failed during slave worker thread create");
+    error= 1;
+    goto err;
+  }
+
+  w->c_rli= rli;
+  w->tables_to_lock= NULL;
+  w->tables_to_lock_count= 0;
+
+  if (w->init_info())
+  {
+    sql_print_error("Failed during slave worker thread create");
+    error= 1;
+    goto err;
+  }
+  
+  // TODO: remove after dynamic_ids will be sorted out (removed/refined) otherwise
+  // entry->usage assert
+  w->curr_group_exec_parts->dynamic_ids.elements= 0;
+  w->relay_log_change_notified= FALSE; // the 1st group to contain relaylog name
+  w->checkpoint_notified= FALSE;
+  w->workers= rli->workers; // shallow copying is sufficient
+  w->this_worker= w;
+  w->wait_jobs= w->trans_jobs= w->stmt_jobs= w->curr_jobs= 0;
+  w->id= i;
+  w->current_table= NULL;
+  w->usage_partition= 0;
+  w->last_group_done_index= rli->gaq->s; // out of range
+
+  w->jobs.s= rli->mts_slave_worker_queue_len_max;
+  my_init_dynamic_array(&w->jobs.Q, sizeof(Slave_job_item), w->jobs.s, 0); // todo: implement increment e.g  n * 10;
+  for (k= 0; k < w->jobs.s; k++)
+    insert_dynamic(&w->jobs.Q, (uchar*) &empty);
+  
+  DBUG_ASSERT(w->jobs.Q.elements == w->jobs.s);
+  
+  w->jobs.e= w->jobs.s;
+  w->jobs.a= 0;
+  w->jobs.len= rli->mts_slave_worker_queue_len_max + 1; // to first handshake
+  w->jobs.overfill= FALSE;    //  todo: move into Slave_jobs_queue constructor
+  w->jobs.waited_overfill= 0;
+  w->wq_overrun_set= FALSE;
+  set_dynamic(&rli->workers, (uchar*) &w, i);
+  mysql_mutex_init(key_mutex_slave_parallel_worker[i], &w->jobs_lock,
+                   MY_MUTEX_INIT_FAST);
+  mysql_cond_init(key_cond_slave_parallel_worker[i], &w->jobs_cond, NULL);
+
+  w->curr_group_seen_begin= FALSE;
+  if (pthread_create(&th, &connection_attrib, handle_slave_worker,
+                     (void*) w))
+  {
+    sql_print_error("Failed during slave worker thread create");
+    error= 1;
+    goto err;
+  }
+  
+  mysql_mutex_lock(&w->jobs_lock);
+  if (w->jobs.len != 0)
+    mysql_cond_wait(&w->jobs_cond, &w->jobs_lock);
+  mysql_mutex_unlock(&w->jobs_lock);
+  // Least occupied inited with zero
+  insert_dynamic(&rli->least_occupied_workers, (uchar*) &w->jobs.len);
+
+err:
+  return error;
+}
+
+
+int slave_start_workers(Relay_log_info *rli, ulong n)
+{
+  uint i;
+  int error= 0;
+
+  if (n == 0) 
+    return error;
+
+  // RLI constructor time alloc/init
+
+  rli->init_workers(n);
+
+  // CGAP dynarray holds id:s of partitions of the Current being executed Group
+  my_init_dynamic_array(&rli->curr_group_assigned_parts, 1 + NAME_LEN + 1, SLAVE_INIT_DBS_IN_GROUP, 1);
+  rli->last_assigned_worker= NULL; /* associated with curr_group_assigned */
+  my_init_dynamic_array(&rli->curr_group_da, sizeof(Log_event*), 8, 2);
+  // Least_occupied_workers array to hold items size of Slave_jobs_queue::len
+  my_init_dynamic_array(&rli->least_occupied_workers, sizeof(ulong), n, 0); 
+
+  // GAQ  queue holds seqno:s of scheduled groups. C polls workers in 
+  //      @c lwm_checkpoint_period to update GAQ (see @c next_event())
+  // The length of GAQ is derived from @c opt_mts_slave_worker_queue_len_max to guarantee
+  // each assigned job being sent to a WQ will find room in GAQ.
+  // opt_mts_slave_worker_queue_len_max * num-of-W:s is the max length case 
+  // all jobs contain one event.
+
+  // size of WQ stays fixed in one slave session
+  rli->mts_slave_worker_queue_len_max= ::opt_mts_slave_worker_queue_len_max;
+  rli->gaq= new Slave_committed_queue(rli->get_group_master_log_name(),
+                                      sizeof(Slave_job_group),
+                                      1 + rli->opt_slave_parallel_workers *
+                                      rli->mts_slave_worker_queue_len_max, n);
+  rli->mts_pending_jobs_size= 0;
+  rli->mts_pending_jobs_size_max= ::opt_mts_pending_jobs_size_max;
+  rli->mts_wqs_underrun_w_id= (ulong) -1;
+  rli->mts_wqs_overrun= 0;
+  rli->mts_wqs_oversize= FALSE;
+  rli->mts_coordinator_basic_nap= ::opt_mts_coordinator_basic_nap;
+  rli->mts_worker_underrun_level= ::opt_mts_worker_underrun_level;
+  rli->mts_total_groups= 0;
+  rli->curr_group_seen_begin= FALSE;
+  rli->curr_group_is_parallel= FALSE;
+  rli->curr_group_isolated= FALSE;
+  rli->curr_group_split= FALSE;
+  rli->checkpoint_seqno= 0;
+  /*
+    dyn memory to consume by Coordinator per event
+  */
+  init_alloc_root(&rli->mts_coor_mem_root, NAME_LEN,
+                  (MAX_DBS_IN_QUERY_MTS / 2) * NAME_LEN);
+
+  for (i= 0; i < n; i++)
+  {
+    if ((error= slave_start_single_worker(rli, i)))
+      goto err;
+  }
+
+  if (init_hash_workers(n))  // MTS: mapping_db_to_worker
+  {
+    sql_print_error("Failed to init partitions hash");
+    error= 1;
+    goto err;
+  }
+
+err:
+  rli->slave_parallel_workers= rli->workers.elements;
+  // end recovery right now if mts_recovery_groups() did not find any gaps
+  if (rli->mts_recovery_group_cnt == 0)
+    rli->recovery_parallel_workers= rli->slave_parallel_workers;
+
+  return error;
+}
+
+/* 
+   Worker threads ends one-by-one with synch through rli->pending_jobs
+*/
+void slave_stop_workers(Relay_log_info *rli)
+{
+  int i;
+  THD *thd= rli->info_thd;
+
+  if (rli->slave_parallel_workers == 0) 
+    return;
+  
+  for (i= rli->workers.elements - 1; i >= 0; i--)
+  {
+    Slave_worker *w;
+    get_dynamic((DYNAMIC_ARRAY*)&rli->workers, (uchar*) &w, i);
+    
+    mysql_mutex_lock(&w->jobs_lock);
+    
+    if (w->jobs.len == rli->mts_slave_worker_queue_len_max + 1)
+    {
+      mysql_mutex_unlock(&w->jobs_lock);
+      continue;
+    }
+    mysql_mutex_unlock(&w->jobs_lock);
+    
+    mysql_mutex_lock(&w->info_thd->LOCK_thd_data);
+    w->info_thd->awake(THD::KILL_QUERY);
+    mysql_mutex_unlock(&w->info_thd->LOCK_thd_data);
+  }
+  
+  thd_proc_info(thd, "Waiting for workers to exit");
+
+  for (i= rli->workers.elements - 1; i >= 0; i--)
+  {
+    Slave_worker *w;
+    get_dynamic((DYNAMIC_ARRAY*)&rli->workers, (uchar*) &w, i);
+
+    mysql_mutex_lock(&w->jobs_lock);
+    while (w->jobs.len != rli->mts_slave_worker_queue_len_max + 1)
+    {
+      const char *save_proc_info;
+      save_proc_info= thd->enter_cond(&w->jobs_cond, &w->jobs_lock,
+                                      "Waiting for workers to exit");
+      mysql_cond_wait(&w->jobs_cond, &w->jobs_lock);
+      thd->exit_cond(save_proc_info);
+      mysql_mutex_lock(&w->jobs_lock);
+    }
+    mysql_mutex_unlock(&w->jobs_lock);
+    mysql_mutex_destroy(&w->jobs_lock);
+    mysql_cond_destroy(&w->jobs_cond);
+
+    w->end_info();
+
+    DBUG_ASSERT(w->jobs.Q.elements == w->jobs.s);
+    delete_dynamic(&w->jobs.Q);
+    delete_dynamic_element(&rli->workers, i);
+    delete w;
+  }
+
+  sql_print_information("MTS coordinator statistics: "
+                        "events processed = %lu "
+                        "waits due a Worker queue full = %lu "
+                        "waits due the total size = %lu "
+                        "sleeps when Workers occupied = %lu "
+                        ,rli->stmt_jobs, rli->mts_wqs_overfill_cnt, rli->wait_jobs, rli->mts_wqs_underrun_cnt);
+
+  DBUG_ASSERT(rli->pending_jobs == 0);
+  DBUG_ASSERT(rli->mts_pending_jobs_size == 0);
+
+  destroy_hash_workers();
+  delete rli->gaq;
+  delete_dynamic(&rli->least_occupied_workers);    // least occupied
+  delete_dynamic(&rli->curr_group_da);             // GCDA
+  delete_dynamic(&rli->curr_group_assigned_parts); // GCAP
+  rli->deinit_workers();
+  rli->slave_parallel_workers= 0;
+  free_root(&rli->mts_coor_mem_root, MYF(0));
+}
+
 /**
   Slave SQL thread entry point.
 
@@ -3618,6 +4477,13 @@ pthread_handler_t handle_slave_sql(void 
   rli->slave_running = 1;
 
   pthread_detach_this_thread();
+
+  /* mts-II: starting the worker pool */
+  if (slave_start_workers(rli, rli->opt_slave_parallel_workers) != 0)
+  {
+      mysql_mutex_unlock(&rli->run_lock);
+      goto err;
+  }
   if (init_slave_thread(thd, SLAVE_THD_SQL))
   {
     /*
@@ -3680,6 +4546,7 @@ pthread_handler_t handle_slave_sql(void 
     goto err;
   }
   THD_CHECK_SENTRY(thd);
+
 #ifndef DBUG_OFF
   {
     char llbuf1[22], llbuf2[22];
@@ -3877,6 +4744,9 @@ llstr(rli->get_group_master_log_pos(), l
   thd->catalog= 0;
   thd->reset_query();
   thd->reset_db(NULL, 0);
+
+  slave_stop_workers(rli); // mts-II: stopping the worker pool
+
   thd_proc_info(thd, "Waiting for slave mutex on exit");
   mysql_mutex_lock(&rli->run_lock);
   /* We need data_lock, at least to wake up any waiting master_pos_wait() */
@@ -4616,7 +5486,7 @@ static int queue_event(Master_info* mi,c
         If the master is on the ignore list, execution of
         format description log events and rotate events is necessary.
       */
-      (mi->ignore_server_ids->server_ids.elements > 0 &&
+      (mi->ignore_server_ids->dynamic_ids.elements > 0 &&
        mi->shall_ignore_server_id(s_id) &&
        /* everything is filtered out from non-master */
        (s_id != mi->master_id ||
@@ -4976,6 +5846,7 @@ static Log_event* next_event(Relay_log_i
   mysql_mutex_t *log_lock = rli->relay_log.get_log_lock();
   const char* errmsg=0;
   THD* thd = rli->info_thd;
+
   DBUG_ENTER("next_event");
 
   DBUG_ASSERT(thd != 0);
@@ -5047,7 +5918,8 @@ static Log_event* next_event(Relay_log_i
                           llstr(my_b_tell(cur_log),llbuf1),
                           llstr(rli->get_event_relay_log_pos(),llbuf2)));
       DBUG_ASSERT(my_b_tell(cur_log) >= BIN_LOG_HEADER_SIZE);
-      DBUG_ASSERT(my_b_tell(cur_log) == rli->get_event_relay_log_pos());
+
+      DBUG_ASSERT(my_b_tell(cur_log) == rli->get_event_relay_log_pos() || rli->is_parallel_exec());
 
       DBUG_PRINT("info", ("next_event group master %s %lu group relay %s %lu event %s %lu\n",
         rli->get_group_master_log_name(),
@@ -5080,6 +5952,18 @@ static Log_event* next_event(Relay_log_i
         inc_event_relay_log_pos()
       */
       rli->set_future_event_relay_log_pos(my_b_tell(cur_log));
+      ev->future_event_relay_log_pos= rli->get_future_event_relay_log_pos();
+
+      /* 
+         MTS checkpoint in the successful read branch 
+      */
+      bool force= (rli->checkpoint_seqno > (rli->checkpoint_group - 1));
+      if (rli->is_parallel_exec() && (mts_checkpoint_period != 0 || force))
+      {
+        ulonglong period= static_cast<ulonglong>(mts_checkpoint_period * 1000000ULL);
+        mts_checkpoint_routine(rli, period, force, TRUE); // ALFRANIO ERROR
+      }
+
       if (hot_log)
         mysql_mutex_unlock(log_lock);
       DBUG_RETURN(ev);
@@ -5193,7 +6077,41 @@ static Log_event* next_event(Relay_log_i
         mysql_mutex_unlock(&rli->log_space_lock);
         mysql_cond_broadcast(&rli->log_space_cond);
         // Note that wait_for_update_relay_log unlocks lock_log !
-        rli->relay_log.wait_for_update_relay_log(rli->info_thd);
+
+        mysql_mutex_t *log_lock= rli->relay_log.get_log_lock();
+        mysql_cond_t  *log_cond= rli->relay_log.get_log_cond();
+
+        const char* old_msg= thd->proc_info;
+
+        if (rli->is_parallel_exec() && mts_checkpoint_period != 0)
+        {
+          int ret= 0;
+          struct timespec waittime;
+          ulonglong period= static_cast<ulonglong>(mts_checkpoint_period * 1000000ULL);
+          ulong signal_cnt= rli->relay_log.signal_cnt;
+
+          do
+          {
+            mts_checkpoint_routine(rli, period, FALSE, FALSE); // ALFRANIO ERROR
+            set_timespec_nsec(waittime, period);
+            thd->enter_cond(log_cond, log_lock,
+                            "Slave has read all relay log; "
+                            "waiting for the slave I/O "
+                            "thread to update it");
+            ret= rli->relay_log.wait_for_update_relay_log(thd, &waittime);
+          } while ((ret == ETIMEDOUT || ret == ETIME) /* todo:remove */ &&
+                   signal_cnt == rli->relay_log.signal_cnt && !thd->killed);
+        }
+        else
+        {
+          thd->enter_cond(log_cond, log_lock,
+                          "Slave has read all relay log; "
+                          "waiting for the slave I/O "
+                          "thread to update it");
+          rli->relay_log.wait_for_update_relay_log(thd, NULL);
+        }
+        
+        thd->exit_cond(old_msg);
         // re-acquire data lock since we released it earlier
         mysql_mutex_lock(&rli->data_lock);
         continue;
@@ -5253,9 +6171,22 @@ static Log_event* next_event(Relay_log_i
         }
         rli->set_event_relay_log_pos(BIN_LOG_HEADER_SIZE);
         rli->set_event_relay_log_name(rli->linfo.log_file_name);
+        /*
+          We may update the worker here but this is not extremlly
+          necessary. /Alfranio
+        */
         rli->flush_info();
       }
 
+      /* Reset the relay-log-change-notified status of  Slave Workers */
+      if (rli->is_parallel_exec())
+      {
+        DBUG_PRINT("info", ("next_event: MTS group relay log changes to %s %lu\n",
+                            rli->get_group_relay_log_name(),
+                            (ulong) rli->get_group_relay_log_pos()));
+        rli->reset_notified_relay_log_change();
+      }
+
       /*
         Now we want to open this next log. To know if it's a hot log (the one
         being written by the I/O thread now) or a cold log, we can use
@@ -5606,6 +6537,12 @@ int start_slave(THD* thd , Master_info* 
       */
       if (thread_mask & SLAVE_SQL)
       {
+        /*
+          To cache the system var value and used it in the following.
+          The system var can change but not the cached.
+        */
+        mi->rli->opt_slave_parallel_workers= opt_mts_slave_parallel_workers;
+
         mysql_mutex_lock(&mi->rli->data_lock);
 
         if (thd->lex->mi.pos)
@@ -5658,9 +6595,28 @@ int start_slave(THD* thd , Master_info* 
             push_warning(thd, MYSQL_ERROR::WARN_LEVEL_NOTE,
                          ER_MISSING_SKIP_SLAVE,
                          ER(ER_MISSING_SKIP_SLAVE));
+          if (mi->rli->opt_slave_parallel_workers != 0)
+          {
+            mi->rli->opt_slave_parallel_workers= 0;
+            push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_NOTE,
+                                ER_NO_FEATURE_ON_PARALLEL_SLAVE,
+                                ER(ER_NO_FEATURE_ON_PARALLEL_SLAVE),
+                                "UNTIL condtion",
+                                "Slave is started in the sequential execution mode.");
+          }
         }
 
         mysql_mutex_unlock(&mi->rli->data_lock);
+
+        /* MTS technical limitation no support of trans retry */
+        if (mi->rli->opt_slave_parallel_workers != 0 && slave_trans_retries != 0)
+        {
+          push_warning_printf(thd, MYSQL_ERROR::WARN_LEVEL_NOTE,
+                              ER_NO_FEATURE_ON_PARALLEL_SLAVE,
+                              ER(ER_NO_FEATURE_ON_PARALLEL_SLAVE),
+                              "Temporary failed transaction retry",
+                              "Such failure will force the slave to stop.");
+        }
       }
       else if (thd->lex->mi.pos || thd->lex->mi.relay_log_pos)
         push_warning(thd, MYSQL_ERROR::WARN_LEVEL_NOTE, ER_UNTIL_COND_IGNORED,
@@ -5800,7 +6756,11 @@ int reset_slave(THD *thd, Master_info* m
     goto err;
   }
 
-  /* Clear master's log coordinates */
+  /* 
+    Clear master's log coordinates 
+
+    Andrei needs to guarantee that this done in sequential mode.
+  */
   mi->init_master_log_pos();
 
   if (remove_info(mi))
@@ -5942,7 +6902,7 @@ bool change_master(THD* thd, Master_info
     is mentioning IGNORE_SERVER_IDS= (...)
   */
   if (lex_mi->repl_ignore_server_ids_opt == LEX_MASTER_INFO::LEX_MI_ENABLE)
-    reset_dynamic(&mi->ignore_server_ids->server_ids);
+    reset_dynamic(&(mi->ignore_server_ids->dynamic_ids));
   for (uint i= 0; i < lex_mi->repl_ignore_server_ids.elements; i++)
   {
     ulong s_id;
@@ -5956,14 +6916,14 @@ bool change_master(THD* thd, Master_info
     else
     {
       if (bsearch((const ulong *) &s_id,
-                  mi->ignore_server_ids->server_ids.buffer,
-                  mi->ignore_server_ids->server_ids.elements, sizeof(ulong),
+                  mi->ignore_server_ids->dynamic_ids.buffer,
+                  mi->ignore_server_ids->dynamic_ids.elements, sizeof(ulong),
                   (int (*) (const void*, const void*))
                   change_master_server_id_cmp) == NULL)
-        insert_dynamic(&mi->ignore_server_ids->server_ids, &s_id);
+        insert_dynamic(&(mi->ignore_server_ids->dynamic_ids), (uchar*) &s_id);
     }
   }
-  sort_dynamic(&mi->ignore_server_ids->server_ids, (qsort_cmp) change_master_server_id_cmp);
+  sort_dynamic(&(mi->ignore_server_ids->dynamic_ids), (qsort_cmp) change_master_server_id_cmp);
 
   if (lex_mi->ssl != LEX_MASTER_INFO::LEX_MI_UNCHANGED)
     mi->ssl= (lex_mi->ssl == LEX_MASTER_INFO::LEX_MI_ENABLE);
@@ -6116,6 +7076,13 @@ bool change_master(THD* thd, Master_info
     before START SLAVE, then old will remain in relay-log.info, and will be the
     in-memory value at restart (thus causing errors, as the old relay log does
     not exist anymore).
+
+    In a multi-master envinroment, we need to make sure that both master
+    info and relay log info are prepared to handle events from all
+    masters. In such case, we need to execute the code below for each
+    master and correctly set the key_info_idx. /Alfranio
+
+    Andrei needs to guarantee that this done in sequential mode.
   */
   ret= mi->rli->flush_info(TRUE);
   mysql_cond_broadcast(&mi->data_cond);
@@ -6130,65 +7097,46 @@ err:
   DBUG_RETURN(ret);
 }
 
+
+/******************************************/
+/*   MTS temporary table support section  */
+
+
 /**
-  @} (end of group Replication)
+   @return   a mutex that guards access to the SQL thread controlled
+             temporary tables list.
 */
-#endif /* HAVE_REPLICATION */
-
-Server_ids::Server_ids()
+mysql_mutex_t* mts_get_temp_table_mutex()
 {
-  my_init_dynamic_array(&server_ids, sizeof(::server_id), 16, 16);
+  return &active_mi->rli->mts_temp_tables_lock;
 }
 
-Server_ids::~Server_ids()
+/**
+   @return a reference to THD of the Coordinator thread or NULL
+           in case of no replication is set up or it's in the sequential mode.
+*/
+THD* mts_get_coordinator_thd()
 {
-  delete_dynamic(&server_ids);
+  return (!active_mi || !active_mi->rli || !active_mi->rli->is_parallel_exec()) ?
+    NULL : active_mi->rli->info_thd;
 }
 
-bool Server_ids::unpack_server_ids(char *param_server_ids)
-{
-  char *token= NULL, *last= NULL;
-  uint num_items= 0;
- 
-  DBUG_ENTER("Server_ids::unpack_server_ids");
-
-  token= strtok_r((char *)const_cast<const char*>(param_server_ids),
-                  " ", &last);
+/**
+   TODO: exploint new slave_worker system thread type property
 
-  if (token == NULL)
-    DBUG_RETURN(TRUE);
+   @param  thd a reference to THD
 
-  num_items= atoi(token);
-  for (uint i=0; i < num_items; i++)
-  {
-    token= strtok_r(NULL, " ", &last);
-    if (token == NULL)
-      DBUG_RETURN(TRUE);
-    else
-    {
-      ulong val= atol(token);
-      insert_dynamic(&server_ids, &val);
-    }
-  }
-  DBUG_RETURN(FALSE);
-}
-
-bool Server_ids::pack_server_ids(String *buffer)
+   @return TRUE if thd belongs to a Worker thread and FALSE otherwise.
+*/
+bool mts_is_worker(THD *thd)
 {
-  DBUG_ENTER("Server_ids::pack_server_ids");
-
-  if (buffer->set_int(server_ids.elements, FALSE, &my_charset_bin))
-    DBUG_RETURN(TRUE);
+  return
+    thd->slave_thread && active_mi->rli->info_thd != thd;
+}
 
-  for (ulong i= 0;
-       i < server_ids.elements; i++)
-  {
-    ulong s_id;
-    get_dynamic(&server_ids, (uchar*) &s_id, i);
-    if (buffer->append(" ") ||
-        buffer->append_ulonglong(s_id))
-      DBUG_RETURN(TRUE);
-  }
+/* end of MTS temp table support section */
 
-  DBUG_RETURN(FALSE);
-}
+/**
+  @} (end of group Replication)
+*/
+#endif /* HAVE_REPLICATION */

=== modified file 'sql/rpl_slave.h'
--- a/sql/rpl_slave.h	2010-12-10 16:55:50 +0000
+++ b/sql/rpl_slave.h	2011-02-27 17:35:25 +0000
@@ -239,6 +239,14 @@ extern char *master_ssl_cipher, *master_
        
 extern I_List<THD> threads;
 
+bool mts_recovery_groups(Relay_log_info *rli, MY_BITMAP *groups);
+bool mts_checkpoint_routine(Relay_log_info *rli, ulonglong period,
+                            bool force, bool locked);
+THD* mts_get_coordinator_thd();
+THD* mts_get_worker_thd();
+mysql_mutex_t* mts_get_temp_table_mutex();
+bool mts_is_worker(THD *thd);
+
 #endif /* HAVE_REPLICATION */
 
 /* masks for start/stop operations on io and sql slave threads */

=== modified file 'sql/rpl_utility.h'
--- a/sql/rpl_utility.h	2010-07-02 18:15:21 +0000
+++ b/sql/rpl_utility.h	2010-09-09 18:43:16 +0000
@@ -27,6 +27,16 @@
 #endif
 #include "mysql_com.h"
 
+/*
+  mts-II prototype macros (once were a part of my_bitmap.h...)
+*/
+#define bit_is_set(I,B)   (sizeof(I) * CHAR_BIT > (B) ?                 \
+                           (((I) & (ULL(1) << (B))) == 0 ? 0 : 1) : -1)
+#define bit_do_set(I,B)   (sizeof(I) * CHAR_BIT > (B) ?         \
+                           ((I) |= (ULL(1) << (B)), 1) : -1)
+#define bit_do_clear(I,B) (sizeof(I) * CHAR_BIT > (B) ?         \
+                           ((I) &= ~(ULL(1) << (B)), 0) : -1)
+
 class Relay_log_info;
 
 

=== modified file 'sql/share/errmsg-utf8.txt'
--- a/sql/share/errmsg-utf8.txt	2010-12-05 22:51:49 +0000
+++ b/sql/share/errmsg-utf8.txt	2011-02-27 17:35:25 +0000
@@ -6445,6 +6445,7 @@ ER_RPL_INFO_DATA_TOO_LONG
   eng "Data for column '%s' too long"
 ER_CANT_LOCK_RPL_INFO_TABLE
         eng "You can't use locks with rpl info tables."
+
 ER_NETWORK_READ_EVENT_CHECKSUM_FAILURE
   eng "Replication event checksum verification failed while reading from network."
 ER_BINLOG_READ_EVENT_CHECKSUM_FAILURE
@@ -6454,3 +6455,8 @@ ER_STMT_CACHE_FULL  
         eng "Multi-row statements required more than 'max_binlog_stmt_cache_size' bytes of storage; increase this mysqld variable and try again"
 ER_BINLOG_STMT_CACHE_SIZE_GREATER_THAN_MAX
   eng "Option binlog_stmt_cache_size (%lu) is greater than max_binlog_stmt_cache_size (%lu); setting binlog_stmt_cache_size equal to max_binlog_stmt_cache_size."
+
+ER_NO_FEATURE_ON_PARALLEL_SLAVE
+  eng "%s is not supported in Parallel Slave. %s"
+ER_UPDATED_DBS_GREATER_MAX
+  eng "Modified database names number exceeds the maximum %d; the names are not written into the replication event."

=== modified file 'sql/sp.cc'
--- a/sql/sp.cc	2010-12-16 18:18:20 +0000
+++ b/sql/sp.cc	2011-02-27 17:35:25 +0000
@@ -1150,6 +1150,7 @@ sp_create_routine(THD *thd, int type, sp
       }
       /* restore sql_mode when binloging */
       thd->variables.sql_mode= saved_mode;
+      thd->add_one_db_to_binlog_updated_dbs(sp->m_db.str);
       /* Such a statement can always go directly to binlog, no trans cache */
       if (thd->binlog_query(THD::STMT_QUERY_TYPE,
                             log_query.c_ptr(), log_query.length(),
@@ -1223,6 +1224,7 @@ sp_drop_routine(THD *thd, int type, sp_n
 
   if (ret == SP_OK)
   {
+    thd->add_one_db_to_binlog_updated_dbs(name->m_db.str);
     if (write_bin_log(thd, TRUE, thd->query(), thd->query_length()))
       ret= SP_INTERNAL_ERROR;
     sp_cache_invalidate();

=== modified file 'sql/sql_base.cc'
--- a/sql/sql_base.cc	2011-01-11 11:45:02 +0000
+++ b/sql/sql_base.cc	2011-02-27 17:35:25 +0000
@@ -40,6 +40,7 @@
 #include "sql_handler.h" // mysql_ha_flush
 #include "sql_partition.h"                      // ALTER_PARTITION_PARAM_TYPE
 #include "log_event.h"                          // Query_log_event
+#include "rpl_slave.h"                          // MTS temp table support
 #include "sql_select.h"
 #include "sp_head.h"
 #include "sp.h"
@@ -58,7 +59,6 @@
 #include <io.h>
 #endif
 
-
 bool
 No_such_table_error_handler::handle_condition(THD *,
                                               uint sql_errno,
@@ -1192,11 +1192,25 @@ bool close_cached_connection_tables(THD 
 
 static void mark_temp_tables_as_free_for_reuse(THD *thd)
 {
-  for (TABLE *table= thd->temporary_tables ; table ; table= table->next)
+#ifndef EMBEDDED_LIBRARY
+  bool mts_slave= mts_is_worker(thd);
+  TABLE *temporary_tables= mts_slave ?
+    mts_get_coordinator_thd()->temporary_tables : thd->temporary_tables;
+  if (mts_slave)
+    mysql_mutex_lock(mts_get_temp_table_mutex());
+#else
+  TABLE *temporary_tables= thd->temporary_tables;
+#endif
+
+  for (TABLE *table= temporary_tables; table ; table=table->next)
   {
     if ((table->query_id == thd->query_id) && ! table->open_by_handler)
       mark_tmp_table_for_reuse(table);
   }
+#ifndef EMBEDDED_LIBRARY
+  if (mts_slave)
+    mysql_mutex_unlock(mts_get_temp_table_mutex());
+#endif
 }
 
 
@@ -1588,6 +1602,8 @@ bool close_temporary_tables(THD *thd)
   bool was_quote_show= TRUE;
   bool error= 0;
 
+  DBUG_ASSERT(!thd->slave_thread || thd->temporary_tables == NULL);
+
   if (!thd->temporary_tables)
     DBUG_RETURN(FALSE);
 
@@ -2025,16 +2041,29 @@ TABLE *find_temporary_table(THD *thd,
                             const char *table_key,
                             uint table_key_length)
 {
-  for (TABLE *table= thd->temporary_tables; table; table= table->next)
+  TABLE *table= NULL;
+#ifndef EMBEDDED_LIBRARY
+  bool mts_slave= mts_is_worker(thd);
+  TABLE *temporary_tables= mts_slave ?
+    mts_get_coordinator_thd()->temporary_tables : thd->temporary_tables;
+  if (mts_slave)
+    mysql_mutex_lock(mts_get_temp_table_mutex());  
+#else
+  TABLE *temporary_tables= thd->temporary_tables;
+#endif
+  for (table= temporary_tables; table; table= table->next)
   {
     if (table->s->table_cache_key.length == table_key_length &&
         !memcmp(table->s->table_cache_key.str, table_key, table_key_length))
     {
-      return table;
+      break;
     }
   }
-
-  return NULL;
+#ifndef EMBEDDED_LIBRARY
+  if (mts_slave)
+    mysql_mutex_unlock(mts_get_temp_table_mutex());
+#endif
+  return table;
 }
 
 
@@ -2072,6 +2101,11 @@ TABLE *find_temporary_table(THD *thd,
 int drop_temporary_table(THD *thd, TABLE_LIST *table_list, bool *is_trans)
 {
   TABLE *table;
+#ifndef EMBEDDED_LIBRARY
+  bool mts_slave= mts_is_worker(thd);
+#endif
+  THD *thd_temp= NULL;
+
   DBUG_ENTER("drop_temporary_table");
   DBUG_PRINT("tmptable", ("closing table: '%s'.'%s'",
                           table_list->db, table_list->table_name));
@@ -2094,7 +2128,26 @@ int drop_temporary_table(THD *thd, TABLE
     unlock the table and remove the table from this list.
   */
   mysql_lock_remove(thd, thd->lock, table);
-  close_temporary_table(thd, table, 1, 1);
+
+#ifndef EMBEDDED_LIBRARY
+  if (mts_slave)
+  {
+    thd_temp= mts_get_coordinator_thd();
+    mysql_mutex_lock(mts_get_temp_table_mutex());
+  }
+  else
+#endif
+  {
+    thd_temp= thd;
+  }
+
+  close_temporary_table(thd_temp, table, 1, 1);
+
+#ifndef EMBEDDED_LIBRARY
+  if (mts_slave)
+     mysql_mutex_unlock(mts_get_temp_table_mutex());
+#endif
+
   DBUG_RETURN(0);
 }
 
@@ -2125,7 +2178,7 @@ void close_temporary_table(THD *thd, TAB
       passing non-zero value to end_slave via rli->save_temporary_tables
       when no temp tables opened, see an invariant below.
     */
-    thd->temporary_tables= table->next;
+    thd->temporary_tables= table->next; // mts: see drop_temporary_table()
     if (thd->temporary_tables)
       table->next->prev= 0;
   }
@@ -2631,7 +2684,17 @@ bool open_table(THD *thd, TABLE_LIST *ta
   if (table_list->open_type != OT_BASE_ONLY &&
       ! (flags & MYSQL_OPEN_SKIP_TEMPORARY))
   {
-    for (table= thd->temporary_tables; table ; table=table->next)
+#ifndef EMBEDDED_LIBRARY
+    bool mts_slave= mts_is_worker(thd);
+    TABLE *temporary_tables= mts_slave ?
+      mts_get_coordinator_thd()->temporary_tables : thd->temporary_tables;
+    if (mts_slave)
+      mysql_mutex_lock(mts_get_temp_table_mutex());
+#else
+    TABLE *temporary_tables= thd->temporary_tables;
+#endif
+
+    for (table= temporary_tables; table ; table=table->next)
     {
       if (table->s->table_cache_key.length == key_length +
           TMP_TABLE_KEY_EXTRA &&
@@ -2651,14 +2714,26 @@ bool open_table(THD *thd, TABLE_LIST *ta
                       (ulong) table->query_id, (uint) thd->server_id,
                       (ulong) thd->variables.pseudo_thread_id));
 	  my_error(ER_CANT_REOPEN_TABLE, MYF(0), table->alias);
+#ifndef EMBEDDED_LIBRARY
+          if (mts_slave)
+            mysql_mutex_unlock(mts_get_temp_table_mutex());
+#endif
 	  DBUG_RETURN(TRUE);
 	}
 	table->query_id= thd->query_id;
 	thd->thread_specific_used= TRUE;
         DBUG_PRINT("info",("Using temporary table"));
+#ifndef EMBEDDED_LIBRARY
+        if (mts_slave)
+          mysql_mutex_unlock(mts_get_temp_table_mutex());
+#endif
         goto reset;
       }
     }
+#ifndef EMBEDDED_LIBRARY
+    if (mts_slave)
+      mysql_mutex_unlock(mts_get_temp_table_mutex());
+#endif
   }
 
   if (table_list->open_type == OT_TEMPORARY_ONLY ||
@@ -5851,14 +5926,28 @@ TABLE *open_table_uncached(THD *thd, con
 
   if (add_to_temporary_tables_list)
   {
+#ifndef EMBEDDED_LIBRARY
+    TABLE **ptr_temporary_tables;
+    bool mts_slave= mts_is_worker(thd);
+    ptr_temporary_tables= mts_slave? 
+      &mts_get_coordinator_thd()->temporary_tables : &thd->temporary_tables;
+    if (mts_slave)
+      mysql_mutex_lock(mts_get_temp_table_mutex());
+#else
+    TABLE **ptr_temporary_tables= &thd->temporary_tables;
+#endif
     /* growing temp list at the head */
-    tmp_table->next= thd->temporary_tables;
+    tmp_table->next= *ptr_temporary_tables;
     if (tmp_table->next)
       tmp_table->next->prev= tmp_table;
-    thd->temporary_tables= tmp_table;
-    thd->temporary_tables->prev= 0;
+    *ptr_temporary_tables= tmp_table;
+    (*ptr_temporary_tables)->prev= 0;
     if (thd->slave_thread)
       slave_open_temp_tables++;
+#ifndef EMBEDDED_LIBRARY
+    if (mts_slave)
+       mysql_mutex_unlock(mts_get_temp_table_mutex());
+#endif
   }
   tmp_table->pos_in_table_list= 0;
   DBUG_PRINT("tmptable", ("opened table: '%s'.'%s' 0x%lx", tmp_table->s->db.str,

=== modified file 'sql/sql_class.cc'
--- a/sql/sql_class.cc	2010-12-17 16:14:15 +0000
+++ b/sql/sql_class.cc	2011-02-27 17:35:25 +0000
@@ -503,6 +503,7 @@ THD::THD()
    user_time(0), in_sub_stmt(0),
    binlog_unsafe_warning_flags(0),
    binlog_table_maps(0),
+   binlog_updated_db_names(NULL),
    table_map_for_update(0),
    arg_of_last_insert_id_function(FALSE),
    first_successful_insert_id_in_prev_stmt(0),
@@ -1397,6 +1398,7 @@ void THD::cleanup_after_query()
     stmt_depends_on_first_successful_insert_id_in_prev_stmt= 0;
     auto_inc_intervals_in_cur_stmt_for_binlog.empty();
     rand_used= 0;
+    binlog_updated_db_names= NULL;
   }
   if (first_successful_insert_id_in_cur_stmt > 0)
   {

=== modified file 'sql/sql_class.h'
--- a/sql/sql_class.h	2010-12-29 00:38:59 +0000
+++ b/sql/sql_class.h	2011-02-27 17:35:25 +0000
@@ -61,7 +61,8 @@ enum enum_delay_key_write { DELAY_KEY_WR
 			    DELAY_KEY_WRITE_ALL };
 enum enum_slave_exec_mode { SLAVE_EXEC_MODE_STRICT,
                             SLAVE_EXEC_MODE_IDEMPOTENT,
-                            SLAVE_EXEC_MODE_LAST_BIT};
+                            SLAVE_EXEC_MODE_LOCAL_TIMESTAMP,
+                            SLAVE_EXEC_MODE_LAST_BIT };
 enum enum_slave_type_conversions { SLAVE_TYPE_CONVERSIONS_ALL_LOSSY,
                                    SLAVE_TYPE_CONVERSIONS_ALL_NON_LOSSY};
 enum enum_mark_columns
@@ -1195,7 +1196,8 @@ enum enum_thread_type
   SYSTEM_THREAD_SLAVE_SQL= 4,
   SYSTEM_THREAD_NDBCLUSTER_BINLOG= 8,
   SYSTEM_THREAD_EVENT_SCHEDULER= 16,
-  SYSTEM_THREAD_EVENT_WORKER= 32
+  SYSTEM_THREAD_EVENT_WORKER= 32,
+  SYSTEM_THREAD_SLAVE_WORKER= 64
 };
 
 inline char const *
@@ -1211,6 +1213,7 @@ show_system_thread(enum_thread_type thre
     RETURN_NAME_AS_STRING(SYSTEM_THREAD_NDBCLUSTER_BINLOG);
     RETURN_NAME_AS_STRING(SYSTEM_THREAD_EVENT_SCHEDULER);
     RETURN_NAME_AS_STRING(SYSTEM_THREAD_EVENT_WORKER);
+    RETURN_NAME_AS_STRING(SYSTEM_THREAD_SLAVE_WORKER);
   default:
     sprintf(buf, "<UNKNOWN SYSTEM THREAD: %d>", thread);
     return buf;
@@ -1718,6 +1721,11 @@ private:
     transaction cache.
   */
   uint binlog_table_maps;
+  /*
+    MTS: db names listing to be updated by the query databases
+  */
+  List<char> *binlog_updated_db_names;
+
 public:
   void issue_unsafe_warnings();
 
@@ -1727,6 +1735,41 @@ public:
   void clear_binlog_table_maps() {
     binlog_table_maps= 0;
   }
+
+  /*
+    MTS: accessor to binlog_updated_db_names list
+  */
+  List<char> * get_binlog_updated_db_names() {
+    return binlog_updated_db_names;
+  }
+
+  /*
+     MTS: initializer of binlog_updated_db_names list
+  */
+  void set_binlog_updated_db_names(List<char>* arg)
+  {
+    binlog_updated_db_names= arg;
+  }
+
+  /*
+     MTS: resetter of binlog_updated_db_names list normally
+     at the end of the query execution
+  */
+  void clear_binlog_updated_db_names() { binlog_updated_db_names= NULL; }
+
+  /* MTS: method inserts a new unique name into binlog_updated_dbs */
+  void add_to_binlog_updated_dbs(const char *db);
+
+  /* 
+     MTS: method shortcuts initialization and insertion of just one db name
+     into binlog_updated_dbs
+  */
+  void add_one_db_to_binlog_updated_dbs(const char *db)
+  {
+    set_binlog_updated_db_names(new List<char>);
+    binlog_updated_db_names->push_back(strdup_root(mem_root, db));
+  }
+
 #endif /* MYSQL_CLIENT */
 
 public:

=== modified file 'sql/sql_db.cc'
--- a/sql/sql_db.cc	2010-12-10 12:52:55 +0000
+++ b/sql/sql_db.cc	2011-02-27 17:35:25 +0000
@@ -660,7 +660,7 @@ not_silent:
       */
       qinfo.db     = db;
       qinfo.db_len = strlen(db);
-
+      thd->add_one_db_to_binlog_updated_dbs(db);
       /*
         These DDL methods and logging are protected with the exclusive
         metadata lock on the schema
@@ -964,6 +964,7 @@ update_binlog:
 
     if (query_pos != query_data_start)
     {
+      thd->add_one_db_to_binlog_updated_dbs(db);
       /*
         These DDL methods and logging are protected with the exclusive
         metadata lock on the schema.

=== modified file 'sql/sql_parse.cc'
--- a/sql/sql_parse.cc	2010-12-29 00:38:59 +0000
+++ b/sql/sql_parse.cc	2011-01-11 23:01:02 +0000
@@ -298,8 +298,7 @@ void init_update_queries(void)
                                             CF_CAN_GENERATE_ROW_EVENTS;
   sql_command_flags[SQLCOM_CREATE_INDEX]=   CF_CHANGES_DATA | CF_AUTO_COMMIT_TRANS;
   sql_command_flags[SQLCOM_ALTER_TABLE]=    CF_CHANGES_DATA | CF_WRITE_LOGS_COMMAND |
-                                            CF_AUTO_COMMIT_TRANS |
-                                            CF_WRITE_RPL_INFO_COMMAND;
+                                            CF_AUTO_COMMIT_TRANS;
   sql_command_flags[SQLCOM_TRUNCATE]=       CF_CHANGES_DATA | CF_WRITE_LOGS_COMMAND |
                                             CF_AUTO_COMMIT_TRANS;
   sql_command_flags[SQLCOM_DROP_TABLE]=     CF_CHANGES_DATA | CF_AUTO_COMMIT_TRANS;
@@ -5165,7 +5164,7 @@ bool check_stack_overrun(THD *thd, long 
 			 uchar *buf __attribute__((unused)))
 {
   long stack_used;
-  DBUG_ASSERT(thd == current_thd);
+  DBUG_ASSERT(thd == current_thd);  // mts-II: be prepared to hit it
   if ((stack_used=used_stack(thd->thread_stack,(char*) &stack_used)) >=
       (long) (my_thread_stack_size - margin))
   {

=== modified file 'sql/sql_rename.cc'
--- a/sql/sql_rename.cc	2010-11-18 16:34:56 +0000
+++ b/sql/sql_rename.cc	2011-02-27 17:35:25 +0000
@@ -317,6 +317,14 @@ do_rename(THD *thd, TABLE_LIST *ren_tabl
       my_error(ER_FILE_NOT_FOUND, MYF(0), name, my_errno);
       break;
   }
+
+  if (!thd->get_binlog_updated_db_names())
+  {
+    thd->set_binlog_updated_db_names(new List<char>);
+  }
+  thd->add_to_binlog_updated_dbs(ren_table->db);
+  thd->add_to_binlog_updated_dbs(new_db);
+
   if (rc && !skip_error)
     DBUG_RETURN(1);
 

=== modified file 'sql/sql_table.cc'
--- a/sql/sql_table.cc	2010-12-17 18:43:38 +0000
+++ b/sql/sql_table.cc	2011-02-27 17:35:25 +0000
@@ -2236,6 +2236,13 @@ int mysql_rm_table_no_locks(THD *thd, TA
                   find_temporary_table(thd, table) &&
                   table->mdl_request.ticket != NULL));
 
+    /* MTS: similarly to decide_logging_format() gathering of the db names */
+    if (!thd->get_binlog_updated_db_names())
+    {
+      thd->set_binlog_updated_db_names(new List<char>);
+    }
+    thd->add_to_binlog_updated_dbs(table->db);
+
     /*
       drop_temporary_table may return one of the following error codes:
       .  0 - a temporary table was successfully dropped.
@@ -4562,7 +4569,10 @@ bool mysql_create_table(THD *thd, TABLE_
       (!thd->is_current_stmt_binlog_format_row() ||
        (thd->is_current_stmt_binlog_format_row() &&
         !(create_info->options & HA_LEX_CREATE_TMP_TABLE))))
+  {
+    thd->add_one_db_to_binlog_updated_dbs(create_table->db);
     result= write_bin_log(thd, TRUE, thd->query(), thd->query_length(), is_trans);
+  }
 
 end:
   DBUG_RETURN(result);
@@ -5942,6 +5952,15 @@ bool mysql_alter_table(THD *thd,char *ne
   db=table_list->db;
   if (!new_db || !my_strcasecmp(table_alias_charset, new_db, db))
     new_db= db;
+
+  if (!thd->get_binlog_updated_db_names())
+  {
+    thd->set_binlog_updated_db_names(new List<char>);
+  }
+  thd->add_to_binlog_updated_dbs(db);
+  if (new_db != db)
+    thd->add_to_binlog_updated_dbs(new_db);
+
   build_table_filename(reg_path, sizeof(reg_path) - 1, db, table_name, reg_ext, 0);
   build_table_filename(path, sizeof(path) - 1, db, table_name, "", 0);
 

=== modified file 'sql/sql_trigger.cc'
--- a/sql/sql_trigger.cc	2010-11-29 16:27:58 +0000
+++ b/sql/sql_trigger.cc	2011-02-27 17:35:25 +0000
@@ -521,6 +521,8 @@ bool mysql_create_or_drop_trigger(THD *t
 end:
   if (!result)
   {
+    if (tables)
+      thd->add_one_db_to_binlog_updated_dbs(tables->db);
     result= write_bin_log(thd, TRUE, stmt_query.ptr(), stmt_query.length());
   }
 

=== modified file 'sql/sql_view.cc'
--- a/sql/sql_view.cc	2010-12-14 11:15:13 +0000
+++ b/sql/sql_view.cc	2011-02-27 17:35:25 +0000
@@ -689,6 +689,7 @@ bool mysql_create_view(THD *thd, TABLE_L
     buff.append(views->source.str, views->source.length);
 
     int errcode= query_error_code(thd, TRUE);
+    thd->add_one_db_to_binlog_updated_dbs(views->db);
     if (thd->binlog_query(THD::STMT_QUERY_TYPE,
                           buff.ptr(), buff.length(), FALSE, FALSE, FALSE, errcode))
       res= TRUE;
@@ -1682,6 +1683,11 @@ bool mysql_drop_view(THD *thd, TABLE_LIS
       }
       continue;
     }
+    if (!thd->get_binlog_updated_db_names())
+    {
+      thd->set_binlog_updated_db_names(new List<char>);
+    }
+    thd->add_to_binlog_updated_dbs(view->db);
     if (mysql_file_delete(key_file_frm, path, MYF(MY_WME)))
       error= TRUE;
 

=== modified file 'sql/sys_vars.cc'
--- a/sql/sys_vars.cc	2011-01-03 14:50:58 +0000
+++ b/sql/sys_vars.cc	2011-02-27 17:35:25 +0000
@@ -433,7 +433,11 @@ static Sys_var_mybool Sys_binlog_direct(
 
 static const char *repository_names[]=
 {
-  "FILE", "TABLE", 0
+  "FILE", "TABLE",
+#ifndef DBUG_OFF
+  "DUMMY",
+#endif
+  0
 };
 
 ulong opt_mi_repository_id;
@@ -450,6 +454,13 @@ static Sys_var_enum Sys_rli_repository(
        , READ_ONLY GLOBAL_VAR(opt_rli_repository_id), CMD_LINE(REQUIRED_ARG),
        repository_names, DEFAULT(0));
 
+ulong opt_worker_repository_id;
+static Sys_var_enum Sys_worker_repository(
+       "worker_info_repository",
+       "Defines the type of the repository for the worker information."
+       , READ_ONLY GLOBAL_VAR(opt_worker_repository_id), CMD_LINE(REQUIRED_ARG),
+       repository_names, DEFAULT(0));
+
 static Sys_var_mybool Sys_binlog_rows_query(
        "binlog_rows_query_log_events",
        "Allow writing of Rows_query_log events into binary log.",
@@ -1905,7 +1916,8 @@ static Sys_var_mybool Sys_slave_compress
        DEFAULT(FALSE));
 
 #ifdef HAVE_REPLICATION
-static const char *slave_exec_mode_names[]= {"STRICT", "IDEMPOTENT", 0};
+static const char *slave_exec_mode_names[]=
+       {"STRICT", "IDEMPOTENT", "PARALLEL", "LOCAL_TIMESTAMP", 0};
 static Sys_var_enum Slave_exec_mode(
        "slave_exec_mode",
        "Modes for how replication events should be executed. Legal values "
@@ -3132,6 +3144,19 @@ static Sys_var_uint Sys_sync_relayloginf
        "synchronous flushing",
        GLOBAL_VAR(sync_relayloginfo_period), CMD_LINE(REQUIRED_ARG),
        VALID_RANGE(0, UINT_MAX), DEFAULT(0), BLOCK_SIZE(1));
+
+static Sys_var_uint Sys_checkpoint_mts_period(
+       "mts_checkpoint_period", "Gather workers' activities to "
+       "flush the relay log info to disk after every #th milli-seconds. "
+       "The zero value disables the checkpoint routine (makes sense for debugging).",
+       GLOBAL_VAR(mts_checkpoint_period), CMD_LINE(REQUIRED_ARG),
+       VALID_RANGE(0, UINT_MAX), DEFAULT(300), BLOCK_SIZE(1));
+
+static Sys_var_uint Sys_checkpoint_mts_group(
+       "mts_checkpoint_group", "Define the number of transactions "
+       "before a checkpoint operation is called.",
+       GLOBAL_VAR(mts_checkpoint_group), CMD_LINE(REQUIRED_ARG),
+       VALID_RANGE(512, UINT_MAX), DEFAULT(512), BLOCK_SIZE(8));
 #endif
 
 static Sys_var_uint Sys_sync_binlog_period(
@@ -3153,6 +3178,52 @@ static Sys_var_ulong Sys_slave_trans_ret
        "or elapsed lock wait timeout, before giving up and stopping",
        GLOBAL_VAR(slave_trans_retries), CMD_LINE(REQUIRED_ARG),
        VALID_RANGE(0, ULONG_MAX), DEFAULT(10), BLOCK_SIZE(1));
+
+static Sys_var_ulong Sys_slave_parallel_workers(
+       "mts_slave_parallel_workers",
+       "Number of worker threads for executing events in parallel ",
+       GLOBAL_VAR(opt_mts_slave_parallel_workers), CMD_LINE(REQUIRED_ARG),
+       VALID_RANGE(0, ULONG_MAX), DEFAULT(0), BLOCK_SIZE(1));
+static Sys_var_ulong Sys_mts_slave_worker_queue_len_max(
+       "mts_slave_worker_queue_len_max",
+       "Max length of one MTS Worker queue. Presence in the queue indicates "
+       "a replication event was read out of Relay log and not yet applied. "
+       "Notice the max size of event data in all queues are governed by "
+       "mts_pending_jobs_size_max. Whichever limit is reached Coordinator thread"
+       "suspends further jobs assigning until conditions have been improved.",
+       GLOBAL_VAR(opt_mts_slave_worker_queue_len_max), CMD_LINE(REQUIRED_ARG),
+       VALID_RANGE(1, ULONG_MAX - 1), DEFAULT(40000), BLOCK_SIZE(1));
+static Sys_var_mybool Sys_slave_local_timestamp(
+       "mts_exp_slave_local_timestamp", "If enabled slave itself computes the event appying "
+       "time value to implicitly affected timestamp columms. Otherwise (default) "
+       "it installs prescribed by the master value",
+       GLOBAL_VAR(opt_mts_slave_local_timestamp), CMD_LINE(OPT_ARG), DEFAULT(FALSE));
+static Sys_var_ulong Sys_mts_partition_hash_soft_max(
+       "mts_partition_hash_soft_max",
+       "Number of records in the mts partition hash below which "
+       "entries with zero usage are tolerated",
+       GLOBAL_VAR(opt_mts_partition_hash_soft_max), CMD_LINE(REQUIRED_ARG),
+       VALID_RANGE(0, ULONG_MAX), DEFAULT(16), BLOCK_SIZE(1));
+static Sys_var_ulonglong Sys_mts_pending_jobs_size_max(
+       "mts_pending_jobs_size_max",
+       "Max size of Slave Worker queues holding yet not applied events."
+       "The least possible value must be not less than the master size "
+       "max_allowed_packet.",
+       GLOBAL_VAR(opt_mts_pending_jobs_size_max), CMD_LINE(REQUIRED_ARG),
+       VALID_RANGE(1024, (ulonglong)~(intptr)0), DEFAULT(16 * 1024*1024),
+       BLOCK_SIZE(1024), ON_CHECK(0));
+static Sys_var_ulong Sys_mts_coordinator_basic_nap(
+       "mts_coordinator_basic_nap",
+       "Time in msec to sleep by MTS Coordinator to avoid the Worker queues "
+       "room overrun",
+       GLOBAL_VAR(opt_mts_coordinator_basic_nap), CMD_LINE(REQUIRED_ARG),
+       VALID_RANGE(0, ULONG_MAX), DEFAULT(5), BLOCK_SIZE(1));
+static Sys_var_ulong Sys_mts_worker_underrun_level(
+       "mts_worker_underrun_level",
+       "percent of Worker queue size at which Worker is considered to become "
+       "hungry",
+       GLOBAL_VAR(opt_mts_worker_underrun_level), CMD_LINE(REQUIRED_ARG),
+       VALID_RANGE(0, 100), DEFAULT(0), BLOCK_SIZE(1));
 #endif
 
 static bool check_locale(sys_var *self, THD *thd, set_var *var)

=== modified file 'sql/table.cc'
--- a/sql/table.cc	2011-01-10 16:37:47 +0000
+++ b/sql/table.cc	2011-01-11 23:01:02 +0000
@@ -57,6 +57,9 @@ LEX_STRING RLI_INFO_NAME= {C_STRING_WITH
 /* MI_INFO name */
 LEX_STRING MI_INFO_NAME= {C_STRING_WITH_LEN("slave_master_info")};
 
+/* WORKER_INFO name */
+LEX_STRING WORKER_INFO_NAME= {C_STRING_WITH_LEN("slave_worker_info")};
+
 	/* Functions defined in this file */
 
 void open_table_error(TABLE_SHARE *share, int error, int db_errno,
@@ -278,6 +281,12 @@ TABLE_CATEGORY get_table_category(const 
                       MI_INFO_NAME.str,
                       name->str) == 0))
       return TABLE_CATEGORY_RPL_INFO;
+
+    if ((name->length == WORKER_INFO_NAME.length) &&
+        (my_strcasecmp(system_charset_info,
+                      WORKER_INFO_NAME.str,
+                      name->str) == 0))
+      return TABLE_CATEGORY_RPL_INFO;
   }
 
   return TABLE_CATEGORY_USER;


Attachment: [text/bzr-bundle] bzr/andrei.elkin@oracle.com-20110518153208-w3lhkrkew8mqhnmo.bundle
Thread
bzr commit into mysql-next-mr-wl5569 branch (andrei.elkin:3487) WL#5569WL#5754Andrei Elkin19 May