List:Commits« Previous MessageNext Message »
From:Rohit Kalhans Date:May 2 2012 12:05pm
Subject:bzr push into mysql-trunk branch (rohit.kalhans:3762 to 3763) WL#5597
View as plain text  
 3763 Rohit Kalhans	2012-05-02 [merge]
      WL#5597: Using batch operations when there is no index in RBR
      
      CONTEXT
      -------
      
      With RBR, if a table has no indexes, the slave does a full table scan for each
      row changed (i.e. updated or deleted). This can be extremely time consuming
      when there is a high number of rows to be updated.
      
      SOLUTION
      ---------
      
      When there is a table without a PK or an INDEX, create a temporary in-memory
      index (e.g. in-memory hash table) and store the rows to be update in it. Then for
      each row in the table, check if the row exists in the hash table. If there is a
      match, do the operation, i.e. update or delete.

    removed:
      mysql-test/suite/rpl/r/rpl_row_find_row_debug.result
      mysql-test/suite/rpl/t/rpl_row_find_row_debug.test
    added:
      mysql-test/extra/rpl_tests/rpl_row_idempotency.test
      mysql-test/include/rpl_hash_scan_assertion.inc
      mysql-test/suite/rpl/r/rpl_row_hash_scan.result
      mysql-test/suite/rpl/r/rpl_row_hash_scan_sanity.result
      mysql-test/suite/rpl/t/rpl_row_hash_scan.test
      mysql-test/suite/rpl/t/rpl_row_hash_scan_sanity.test
      mysql-test/suite/sys_vars/r/slave_rows_search_algorithms_basic.result
      mysql-test/suite/sys_vars/t/slave_rows_search_algorithms_basic.test
    modified:
      mysql-test/collections/default.daily
      mysql-test/extra/rpl_tests/rpl_record_compare.test
      mysql-test/r/mysqld--help-notwin.result
      mysql-test/r/mysqld--help-win.result
      mysql-test/suite/rpl/r/rpl_bug26395.result
      mysql-test/suite/rpl/r/rpl_parallel_start_stop.result
      mysql-test/suite/rpl/r/rpl_relayrotate.result
      mysql-test/suite/rpl/r/rpl_row_idempotency.result
      mysql-test/suite/rpl/r/rpl_row_rec_comp_innodb.result
      mysql-test/suite/rpl/r/rpl_row_rec_comp_myisam.result
      mysql-test/suite/rpl/t/rpl_bug26395.test
      mysql-test/suite/rpl/t/rpl_parallel_start_stop.test
      mysql-test/suite/rpl/t/rpl_relayrotate.test
      mysql-test/suite/rpl/t/rpl_row_idempotency.test
      sql/handler.h
      sql/log_event.cc
      sql/log_event.h
      sql/mysqld.cc
      sql/mysqld.h
      sql/rpl_utility.cc
      sql/rpl_utility.h
      sql/sql_class.h
      sql/sys_vars.cc
      storage/blackhole/ha_blackhole.h
 3762 Norvald H. Ryeng	2012-05-02
      Bug#13330886 TOO MANY ROWS WITH ALL|ANY
      
      Post push fix. Changed test result for test explain_json_validate.

    modified:
      mysql-test/suite/explain_json_validate/r/explain_json_validate.result
=== modified file 'mysql-test/collections/default.daily'
--- a/mysql-test/collections/default.daily	2012-03-21 10:25:37 +0000
+++ b/mysql-test/collections/default.daily	2012-05-02 12:04:42 +0000
@@ -28,6 +28,10 @@ perl mysql-test-run.pl --timer --force -
 perl mysql-test-run.pl --timer --force --parallel=auto --comment=n_mix_4k_size --vardir=var-n_mix --mysqld=--binlog-format=mixed --experimental=collections/default.experimental --skip-ndb --skip-test-list=collections/disabled-per-push.list --mysqld=--innodb-page-size=4k --skip-test=innodb_ignore_builtin
 perl mysql-test-run.pl --timer --force --parallel=auto --comment=n_mix_8k_size --vardir=var-n_mix --mysqld=--binlog-format=mixed --experimental=collections/default.experimental --skip-ndb --skip-test-list=collections/disabled-per-push.list --mysqld=--innodb-page-size=8k --skip-test=innodb_ignore_builtin
 
+# Additional run to test rpl HASH_SCAN & INDEX_SCAN
+perl mysql-test-run.pl --force --timer --parallel=auto --experimental=collections/default.experimental --comment=binlog_rpl_row_hash_scan --vardir=var-binlog_rpl_row_hash_scan --mysqld=--binlog-format=row --suite=binlog,rpl --mysqld=--slave-rows-search-algorithms=HASH_SCAN,INDEX_SCAN --big-test --testcase-timeout=60
+perl mysql-test-run.pl --timer --force --parallel=auto --comment=rpl_binlog_row_hash_MTS --vardir=var-mts-rpl-binlog-hash-n_mix --mysqld=--binlog-format=row --experimental=collections/default.experimental --skip-ndb  --unit-tests --mysqld=--slave-parallel-workers=4 --mysqld=--slave-transaction-retries=0 --suite=rpl,binlog  -mysqld=--slave-rows-search-algorithms=HASH_SCAN,INDEX_SCAN
+
 #Engine independent tests
 perl mysql-test-run.pl --timer --force --debug-server --parallel=auto --comment=eits-rpl-binlog-row-tests-myisam-engine-debug --experimental=collections/default.experimental --vardir=var-binlog-row-eits-tests-myisam-engine-debug --suite=engines/iuds,engines/funcs --suite-timeout=500 --max-test-fail=0 --retry-failure=0 --mysqld=--default-storage-engine=myisam --do-test=rpl --mysqld=--binlog-format=row --skip-test-list=collections/disabled-daily.list
 perl mysql-test-run.pl --timer --force --debug-server --parallel=auto  --comment=eits-rpl-binlog-mixed-tests-myisam-engine-debug --experimental=collections/default.experimental --vardir=var-binlog-mixed-eits-tests-myisam-engine-debug --suite=engines/iuds,engines/funcs --suite-timeout=500 --max-test-fail=0 --retry-failure=0 --mysqld=--default-storage-engine=myisam --do-test=rpl --mysqld=--binlog-format=mixed --skip-test-list=collections/disabled-daily.list

=== modified file 'mysql-test/extra/rpl_tests/rpl_record_compare.test'
--- a/mysql-test/extra/rpl_tests/rpl_record_compare.test	2011-03-24 10:52:40 +0000
+++ b/mysql-test/extra/rpl_tests/rpl_record_compare.test	2011-12-15 09:11:40 +0000
@@ -62,24 +62,3 @@ UPDATE t1 SET c1= 0;
 DROP TABLE t1; 
 -- sync_slave_with_master
 
-#
-# BUG#11766865: 60091: RBR + NO PK + UPDATE NULL VALUE --> SLAVE BREAK WITH ERROR HA_ERR_END_OF_ 
-#
-
---connection master
---source include/rpl_reset.inc
---connection master
-
---eval CREATE TABLE t1 (c1 int(11) NOT NULL, c2 int(11) NOT NULL, c3 int(11) DEFAULT '-1') ENGINE=$engine DEFAULT CHARSET=latin1
-
-INSERT INTO t1 VALUES (1,2,NULL);
-UPDATE t1 SET c1=1, c2=2, c3=-1 WHERE c1=1 AND c2=2 AND ISNULL(c3);
-
---sync_slave_with_master
-
---let $diff_tables=master:test.t1, slave:test.t1
---source include/diff_tables.inc
-
---connection master
-DROP TABLE t1;
---sync_slave_with_master

=== added file 'mysql-test/extra/rpl_tests/rpl_row_idempotency.test'
--- a/mysql-test/extra/rpl_tests/rpl_row_idempotency.test	1970-01-01 00:00:00 +0000
+++ b/mysql-test/extra/rpl_tests/rpl_row_idempotency.test	2010-11-23 00:08:01 +0000
@@ -0,0 +1,313 @@
+
+# bug#31609 Not all RBR slave errors reported as errors
+# bug#31552 Replication breaks when deleting rows from out-of-sync table
+#           without PK
+
+# The default for slave-exec-mode option and server
+# variable slave_exec_mode  is 'STRICT'.
+# When 'STRICT' mode is set, the slave SQL thread will stop whenever
+# the row to change is not found. In 'IDEMPOTENT' mode, the SQL thread
+# will continue running and apply the row - replace if it's Write_rows event -
+# or skip to the next event.
+
+# the previous part of the tests was with IDEMPOTENT slave's mode.
+
+
+#
+# Other than above idempotent errors dealing with foreign keys constraint
+#
+connection slave;
+
+set @old_slave_exec_mode= @@global.slave_exec_mode;
+set @@global.slave_exec_mode= IDEMPOTENT;
+
+connection master;
+
+create table ti1 (b int primary key) engine = innodb;
+create table ti2 (a int primary key, b int, foreign key (b) references ti1(b))
+     engine = innodb;
+set foreign_key_checks=1 /* ensure the check */;
+
+insert into ti1 values (1),(2),(3);
+insert into ti2 set a=2, b=2;
+
+sync_slave_with_master;
+
+#connection slave;
+select * from ti1 order by b /* must be (1),(2),(3) */;
+insert into ti2 set a=1, b=1;
+select * from ti2 order by b /* must be (1,1) (2,2) */;
+
+connection master;
+
+# from now on checking rbr specific idempotent errors
+set @save_binlog_format= @@session.binlog_format;
+set @@session.binlog_format= row;
+delete from ti1 where b=1;
+
+select * from ti1 order by b /* must be (2),(3) */;
+
+# slave must catch up (expect some warnings in error.log)
+sync_slave_with_master;
+
+#connection slave;
+select * from ti1 order by b /* must stays as were on master (1),(2),(3) */;
+
+delete from ti1 where b=3;
+
+connection master;
+insert into ti2 set a=3, b=3;
+
+# slave must catch up (expect some warnings in error.log)
+sync_slave_with_master;
+
+#connection slave;
+select * from ti2 order by b /* must be (1,1),(2,2) - not inserted */;
+
+
+#
+# Checking the new global sys variable
+#
+
+connection slave;
+
+set global slave_exec_mode='IDEMPOTENT';
+set global slave_exec_mode='STRICT';
+
+# checking mutual exclusion for the options
+--error ER_WRONG_VALUE_FOR_VAR
+set global slave_exec_mode='IDEMPOTENT,STRICT';
+
+select @@global.slave_exec_mode /* must be STRICT */;
+
+#
+# Checking stops.
+# In the following sections strict slave sql thread is going to
+# stop when faces an idempotent error. In order to proceed
+# the mode is temporarily switched to indempotent.
+#
+
+#
+--echo *** foreign keys errors as above now forces to stop
+#
+
+connection master;
+
+set foreign_key_checks=0;
+drop table ti2, ti1;
+
+create table ti1 (b int primary key) engine = innodb;
+create table ti2 (a int primary key, b int, foreign key (b) references ti1(b))
+     engine = innodb;
+set foreign_key_checks=1 /* ensure the check */;
+
+insert into ti1 values (1),(2),(3);
+insert into ti2 set a=2, b=2;
+
+sync_slave_with_master;
+
+#connection slave;
+select * from ti1 order by b /* must be (1),(2),(3) */;
+--echo *** conspire future problem
+insert into ti2 set a=1, b=1;
+select * from ti2 order by b /* must be (1,1) (2,2) */;
+
+connection master;
+
+delete from ti1 where b=1 /* offending delete event */;
+select * from ti1 order by b /* must be (2),(3) */;
+
+# foreign key: row is referenced
+
+--echo *** slave must stop (Trying to delete a referenced foreing key)
+connection slave;
+source include/wait_for_slave_sql_to_stop.inc;
+
+let $last_error = query_get_value("SHOW SLAVE STATUS", Last_SQL_Errno, 1);
+disable_query_log;
+eval SELECT "$last_error" AS Last_SQL_Error;
+enable_query_log;
+
+select * from ti1 order by b /* must be (1),(2),(3) - not deleted */;
+set foreign_key_checks= 0;
+delete from ti2 where b=1;
+set foreign_key_checks= 1;
+set global slave_exec_mode='IDEMPOTENT';
+start slave sql_thread;
+connection master;
+sync_slave_with_master;
+#connection slave;
+set global slave_exec_mode='STRICT';
+
+connection master;
+
+sync_slave_with_master;
+
+#connection slave;
+--echo *** conspire the following insert failure
+# foreign key: no referenced row
+
+--echo *** conspire future problem
+delete from ti1 where b=3;
+
+connection master;
+insert into ti2 set a=3, b=3 /* offending write event */;
+
+--echo *** slave must stop (Trying to insert an invalid foreign key)
+connection slave;
+source include/wait_for_slave_sql_to_stop.inc;
+
+let $last_error = query_get_value("SHOW SLAVE STATUS", Last_SQL_Errno, 1);
+disable_query_log;
+eval SELECT "$last_error" AS Last_SQL_Error;
+enable_query_log;
+
+select * from ti2 order by b /* must be (2,2) */;
+set foreign_key_checks= 0;
+insert into ti1 set b=3;
+set foreign_key_checks= 1;
+set global slave_exec_mode='IDEMPOTENT';
+start slave sql_thread;
+connection master;
+sync_slave_with_master;
+#connection slave;
+set global slave_exec_mode='STRICT';
+
+connection master;
+
+sync_slave_with_master;
+
+select * from ti2 order by b /* must be (2,2),(3,3) */;
+
+# 
+--echo *** other errors
+# 
+
+# dup key insert
+
+#connection slave;
+--echo *** conspiring query
+insert into ti1 set b=1;
+
+connection master;
+insert into ti1 set b=1 /* offending write event */;
+
+--echo *** slave must stop (Trying to insert a dupliacte key)
+connection slave;
+source include/wait_for_slave_sql_to_stop.inc;
+
+let $last_error = query_get_value("SHOW SLAVE STATUS", Last_SQL_Errno, 1);
+disable_query_log;
+eval SELECT "$last_error" AS Last_SQL_Error;
+enable_query_log;
+
+set foreign_key_checks= 0;
+delete from ti1 where b=1;
+set foreign_key_checks= 1;
+set global slave_exec_mode='IDEMPOTENT';
+start slave sql_thread;
+connection master;
+sync_slave_with_master;
+#connection slave;
+set global slave_exec_mode='STRICT';
+
+# key not found
+
+connection master;
+
+CREATE TABLE t1 (a INT PRIMARY KEY);
+CREATE TABLE t2 (a INT);
+INSERT INTO t1 VALUES (-1),(-2),(-3);
+INSERT INTO t2 VALUES (-1),(-2),(-3);
+sync_slave_with_master;
+
+#connection slave;
+DELETE FROM t1 WHERE a = -2;
+DELETE FROM t2 WHERE a = -2;
+connection master;
+DELETE FROM t1 WHERE a = -2;
+
+--echo *** slave must stop (Key was not found)
+connection slave;
+source include/wait_for_slave_sql_to_stop.inc;
+
+let $last_error = query_get_value("SHOW SLAVE STATUS", Last_SQL_Errno, 1);
+disable_query_log;
+eval SELECT "$last_error" AS Last_SQL_Error;
+enable_query_log;
+
+set global slave_exec_mode='IDEMPOTENT';
+start slave sql_thread;
+connection master;
+sync_slave_with_master;
+#connection slave;
+set global slave_exec_mode='STRICT';
+
+connection master;
+DELETE FROM t2 WHERE a = -2; 
+--echo *** slave must stop (Key was not found)
+connection slave;
+source include/wait_for_slave_sql_to_stop.inc;
+
+let $last_error = query_get_value("SHOW SLAVE STATUS", Last_SQL_Errno, 1);
+disable_query_log;
+eval SELECT "$last_error" AS Last_SQL_Error;
+enable_query_log;
+
+set global slave_exec_mode='IDEMPOTENT';
+start slave sql_thread;
+connection master;
+sync_slave_with_master;
+#connection slave;
+set global slave_exec_mode='STRICT';
+
+UPDATE t1 SET a = 1 WHERE a = -1;
+UPDATE t2 SET a = 1 WHERE a = -1;
+
+connection master;
+UPDATE t1 SET a = 1 WHERE a = -1;
+
+--echo *** slave must stop (Key was not found)
+connection slave;
+source include/wait_for_slave_sql_to_stop.inc;
+
+let $last_error = query_get_value("SHOW SLAVE STATUS", Last_SQL_Errno, 1);
+disable_query_log;
+eval SELECT "$last_error" AS Last_SQL_Error;
+enable_query_log;
+
+set global slave_exec_mode='IDEMPOTENT';
+start slave sql_thread;
+connection master;
+sync_slave_with_master;
+#connection slave;
+set global slave_exec_mode='STRICT';
+
+
+connection master;
+UPDATE t2 SET a = 1 WHERE a = -1;
+
+--echo *** slave must stop (Key was not found)
+connection slave;
+source include/wait_for_slave_sql_to_stop.inc;
+
+let $last_error = query_get_value("SHOW SLAVE STATUS", Last_SQL_Errno, 1);
+disable_query_log;
+eval SELECT "$last_error" AS Last_SQL_Error;
+enable_query_log;
+
+set global slave_exec_mode='IDEMPOTENT';
+start slave sql_thread;
+connection master;
+sync_slave_with_master;
+#connection slave;
+SET @@global.slave_exec_mode= @old_slave_exec_mode;
+
+# cleanup for bug#31609 tests
+
+connection master;
+
+drop table t1,t2,ti2,ti1;
+sync_slave_with_master;
+set @@global.slave_exec_mode= @old_slave_exec_mode;
+

=== added file 'mysql-test/include/rpl_hash_scan_assertion.inc'
--- a/mysql-test/include/rpl_hash_scan_assertion.inc	1970-01-01 00:00:00 +0000
+++ b/mysql-test/include/rpl_hash_scan_assertion.inc	2010-11-23 00:53:54 +0000
@@ -0,0 +1,9 @@
+-- sync_slave_with_master
+-- let $scan_alg= query_get_value(SHOW STATUS LIKE "Slave_rows_last_search_algorithm_used", Value, 1)
+if (`SELECT '$scan_alg' <> $expected_alg`)
+{
+    -- source include/show_rpl_debug_info.inc
+    -- echo Unexcepted search algorithm at the slave: got $scan_alg, expected: $expected_alg
+    -- die 
+}
+-- connection master

=== modified file 'mysql-test/r/mysqld--help-notwin.result'
--- a/mysql-test/r/mysqld--help-notwin.result	2012-04-27 17:16:36 +0000
+++ b/mysql-test/r/mysqld--help-notwin.result	2012-05-02 12:04:42 +0000
@@ -785,6 +785,14 @@ The following options may be given as th
  Max size of Slave Worker queues holding yet not applied
  events.The least possible value must be not less than the
  master side max_allowed_packet.
+ --slave-rows-search-algorithms=name 
+ Set of searching algorithms that the slave will use while
+ searching for records from the storage engine to either
+ updated or deleted them. Possible values are: INDEX_SCAN,
+ TABLE_SCAN and HASH_SCAN. Any combination is allowed, and
+ the slave will always pick the most suitable algorithm
+ for any given scenario. (Default: INDEX_SCAN,
+ TABLE_SCAN).
  --slave-skip-errors=name 
  Tells the slave thread to continue replication when a
  query event returns an error from the provided list
@@ -1135,6 +1143,7 @@ slave-exec-mode STRICT
 slave-net-timeout 3600
 slave-parallel-workers 0
 slave-pending-jobs-size-max 16777216
+slave-rows-search-algorithms TABLE_SCAN,INDEX_SCAN
 slave-skip-errors (No default value)
 slave-sql-verify-checksum TRUE
 slave-transaction-retries 10

=== modified file 'mysql-test/r/mysqld--help-win.result'
--- a/mysql-test/r/mysqld--help-win.result	2012-04-27 17:16:36 +0000
+++ b/mysql-test/r/mysqld--help-win.result	2012-05-02 12:04:42 +0000
@@ -788,6 +788,14 @@ The following options may be given as th
  Max size of Slave Worker queues holding yet not applied
  events.The least possible value must be not less than the
  master side max_allowed_packet.
+ --slave-rows-search-algorithms=name 
+ Set of searching algorithms that the slave will use while
+ searching for records from the storage engine to either
+ updated or deleted them. Possible values are: INDEX_SCAN,
+ TABLE_SCAN and HASH_SCAN. Any combination is allowed, and
+ the slave will always pick the most suitable algorithm
+ for any given scenario. (Default: INDEX_SCAN,
+ TABLE_SCAN).
  --slave-skip-errors=name 
  Tells the slave thread to continue replication when a
  query event returns an error from the provided list
@@ -1145,6 +1153,7 @@ slave-exec-mode STRICT
 slave-net-timeout 3600
 slave-parallel-workers 0
 slave-pending-jobs-size-max 16777216
+slave-rows-search-algorithms TABLE_SCAN,INDEX_SCAN
 slave-skip-errors (No default value)
 slave-sql-verify-checksum TRUE
 slave-transaction-retries 10

=== modified file 'mysql-test/suite/rpl/r/rpl_bug26395.result'
--- a/mysql-test/suite/rpl/r/rpl_bug26395.result	2012-03-23 20:11:19 +0000
+++ b/mysql-test/suite/rpl/r/rpl_bug26395.result	2012-04-09 13:43:35 +0000
@@ -22,17 +22,10 @@ a
 include/sync_slave_io_with_master.inc
 ==== Verify results on slave ====
 include/stop_slave.inc
-SELECT "" AS Slave_IO_State;
-Slave_IO_State
-
-SELECT "" AS Last_SQL_Error;
-Last_SQL_Error
-
-SELECT "" AS Last_IO_Error;
-Last_IO_Error
-
-SELECT * FROM tinnodb ORDER BY a;
-a
+Slave_IO_State = ''
+Last_SQL_Error = ''
+Last_IO_Error = ''
+include/assert.inc [Assert that the slave table has no rows]
 ==== Clean up ====
 [on master]
 DROP TABLE tinnodb;

=== modified file 'mysql-test/suite/rpl/r/rpl_parallel_start_stop.result'
--- a/mysql-test/suite/rpl/r/rpl_parallel_start_stop.result	2012-04-23 08:14:28 +0000
+++ b/mysql-test/suite/rpl/r/rpl_parallel_start_stop.result	2012-04-24 13:39:42 +0000
@@ -35,6 +35,7 @@ insert into t2m values (1);
 begin;
 update t1 set a=31 where a=3;
 insert into t1 values (5),(6),(7);
+update t1 set a=a+10;
 begin;
 update t1 set a=20 where a=2;
 insert into t2m values (2);

=== modified file 'mysql-test/suite/rpl/r/rpl_relayrotate.result'
--- a/mysql-test/suite/rpl/r/rpl_relayrotate.result	2012-03-23 20:11:19 +0000
+++ b/mysql-test/suite/rpl/r/rpl_relayrotate.result	2012-04-09 13:43:35 +0000
@@ -3,6 +3,7 @@ Warnings:
 Note	####	Sending passwords in plain text without SSL/TLS is extremely insecure.
 Note	####	Storing MySQL user name or password information in the master.info repository is not secure and is therefore not recommended. Please see the MySQL Manual for more about this issue and possible alternatives.
 [connection master]
+call mtr.add_suppression('Slave SQL: Request to stop slave SQL Thread received while applying a group that has non-transactional changes; waiting for completion of the group');
 stop slave;
 create table t1 (a int) engine=innodb;
 reset slave;

=== removed file 'mysql-test/suite/rpl/r/rpl_row_find_row_debug.result'
--- a/mysql-test/suite/rpl/r/rpl_row_find_row_debug.result	2011-11-11 17:26:56 +0000
+++ b/mysql-test/suite/rpl/r/rpl_row_find_row_debug.result	1970-01-01 00:00:00 +0000
@@ -1,18 +0,0 @@
-include/master-slave.inc
-[connection master]
-include/stop_slave.inc
-SET GLOBAL log_warnings = 2;
-SET GLOBAL debug="d,inject_long_find_row_note";
-include/start_slave.inc
-CREATE TABLE t1 (c1 INT);
-INSERT INTO t1 VALUES (1), (2);
-UPDATE t1 SET c1= 1000 WHERE c1=2;
-DELETE FROM t1;
-DROP TABLE t1;
-# Check if any note related to long DELETE_ROWS and UPDATE_ROWS appears in the error log
-Occurrences: update=1, delete=1
-include/stop_slave.inc
-SET GLOBAL debug = '';
-SET GLOBAL log_warnings = 1;
-include/start_slave.inc
-include/rpl_end.inc

=== added file 'mysql-test/suite/rpl/r/rpl_row_hash_scan.result'
--- a/mysql-test/suite/rpl/r/rpl_row_hash_scan.result	1970-01-01 00:00:00 +0000
+++ b/mysql-test/suite/rpl/r/rpl_row_hash_scan.result	2012-04-25 11:18:57 +0000
@@ -0,0 +1,85 @@
+include/master-slave.inc
+Warnings:
+Note	####	Sending passwords in plain text without SSL/TLS is extremely insecure.
+Note	####	Storing MySQL user name or password information in the master.info repository is not secure and is therefore not recommended. Please see the MySQL Manual for more about this issue and possible alternatives.
+[connection master]
+call mtr.add_suppression("Slave SQL: Could not execute Update_rows event on table test.t1; Can't find record in 't1', Error_code: 1032; handler error HA_ERR_END_OF_FILE; the event's master log master-bin.[0-9]*, end_log_pos [0-9]*, Error_code: 1032");
+call mtr.add_suppression("Slave: Can't find record in 't1' Error_code: 1032");
+call mtr.add_suppression("Slave SQL: Could not execute Delete_rows event on table test.t1; Can't find record in 't1', Error_code: 1032; handler error HA_ERR_END_OF_FILE; the event's master log master-bin.[0-9]*, end_log_pos [0-9]*, Error_code: 1032");
+call mtr.add_suppression("Slave SQL: ... The slave coordinator and worker threads are stopped, possibly leaving data in inconsistent state. A restart should restore consistency automatically, although using non-transactional storage for data or info tables or DDL queries could lead to problems. In such cases you have to examine your data (see documentation for details). Error_code: 1756");
+Warnings:
+Warning	1265	Data truncated for column 'pattern' at row 1
+call mtr.add_suppression("Slave SQL: Could not execute Delete_rows event on table test.t1; Can't find record in 't1', Error_code: 1032; handler error HA_ERR_END_OF_FILE; the event's master log FIRST, end_log_pos [0-9]*, Error_code: 1032");
+SET @saved_slave_rows_search_algorithms= @@global.slave_rows_search_algorithms;
+SET GLOBAL slave_rows_search_algorithms= 'INDEX_SCAN,HASH_SCAN';
+CREATE TABLE t1 (a INT);
+INSERT INTO t1 VALUES (1), (1), (2), (3);
+DELETE FROM t1;
+INSERT INTO t1 VALUES (2), (1), (3), (1);
+UPDATE t1 SET a=1000 WHERE a=1;
+include/diff_tables.inc [master:test.t1, slave:test.t1]
+DELETE FROM t1 WHERE a=1000;
+DELETE FROM t1 WHERE a=2 OR a=3;
+include/diff_tables.inc [master:test.t1, slave:test.t1]
+DROP TABLE  t1;
+include/rpl_reset.inc
+SET SQL_LOG_BIN=0;
+CREATE TABLE t1 (a INT, b INT);
+SET SQL_LOG_BIN=1;
+CREATE TABLE t1 (a INT);
+INSERT INTO t1 VALUES (1,1), (1,2), (2,1), (2,2);
+UPDATE t1 SET a=1000 WHERE a=1;
+SELECT * FROM t1;
+a	b
+1000	1
+1000	2
+2	1
+2	2
+SELECT * FROM t1;
+a
+1000
+1000
+2
+2
+DELETE FROM t1 WHERE a=1000;
+DELETE FROM t1 WHERE a=2;
+SELECT * FROM t1;
+a	b
+SELECT * FROM t1;
+a
+DROP TABLE  t1;
+include/rpl_reset.inc
+CREATE TABLE t1 (a INT);
+INSERT INTO t1 VALUES (1), (1), (2), (3);
+DELETE FROM t1 WHERE a=1;
+DELETE FROM t1 WHERE a=2;
+UPDATE t1 SET a=1000 WHERE a=1;
+include/wait_for_slave_sql_error_and_skip.inc [errno=1032]
+DELETE FROM t1 WHERE a=2;
+include/wait_for_slave_sql_error_and_skip.inc [errno=1032]
+DROP TABLE t1;
+include/rpl_reset.inc
+CREATE TABLE t1 (a INT, b TINYBLOB);
+INSERT INTO t1 VALUES (1,'a'), (1, 'b'), (2,'aa'), (2, 'aa');
+UPDATE t1 SET b='c' WHERE a=1;
+include/diff_tables.inc [master:test.t1, slave:test.t1]
+UPDATE t1 SET a=10000 WHERE b='aa';
+include/diff_tables.inc [master:test.t1, slave:test.t1]
+UPDATE t1 SET b='c' WHERE b='aa';
+include/diff_tables.inc [master:test.t1, slave:test.t1]
+DELETE FROM t1 WHERE b='c';
+include/diff_tables.inc [master:test.t1, slave:test.t1]
+DROP TABLE  t1;
+include/rpl_reset.inc
+CREATE TABLE t1 (a TINYBLOB, b TINYBLOB);
+INSERT INTO t1 VALUES ('a','a'), ('b', 'b'), ('a','aa'), ('a', 'aa');
+UPDATE t1 SET b='c' WHERE b='aa';
+include/diff_tables.inc [master:test.t1, slave:test.t1]
+DELETE FROM t1;
+include/diff_tables.inc [master:test.t1, slave:test.t1]
+INSERT INTO t1 VALUES (NULL,NULL), (NULL, NULL);
+DELETE FROM t1;
+include/diff_tables.inc [master:test.t1, slave:test.t1]
+DROP TABLE  t1;
+SET @@global.slave_rows_search_algorithms= @saved_slave_rows_search_algorithms;
+include/rpl_end.inc

=== added file 'mysql-test/suite/rpl/r/rpl_row_hash_scan_sanity.result'
--- a/mysql-test/suite/rpl/r/rpl_row_hash_scan_sanity.result	1970-01-01 00:00:00 +0000
+++ b/mysql-test/suite/rpl/r/rpl_row_hash_scan_sanity.result	2012-04-16 16:57:42 +0000
@@ -0,0 +1,48 @@
+include/master-slave.inc
+Warnings:
+Note	####	Sending passwords in plain text without SSL/TLS is extremely insecure.
+Note	####	Storing MySQL user name or password information in the master.info repository is not secure and is therefore not recommended. Please see the MySQL Manual for more about this issue and possible alternatives.
+[connection master]
+CREATE TABLE t1 (c1 INT);
+CREATE TABLE t2 (c1 INT PRIMARY KEY);
+CREATE TABLE t3 (c1 INT UNIQUE KEY NOT NULL);
+CREATE TABLE t4 (c1 INT KEY);
+INSERT INTO t1 VALUES (1);
+INSERT INTO t2 VALUES (1);
+INSERT INTO t3 VALUES (1);
+INSERT INTO t4 VALUES (1);
+SET @saved_slave_rows_search_algorithms= @@global.slave_rows_search_algorithms;
+SET @@global.slave_rows_search_algorithms= 'TABLE_SCAN';
+UPDATE t1 SET c1= 2;
+UPDATE t2 SET c1= 2;
+UPDATE t3 SET c1= 2;
+UPDATE t4 SET c1= 2;
+SET @@global.slave_rows_search_algorithms= 'TABLE_SCAN,INDEX_SCAN';
+UPDATE t1 SET c1= 3;
+UPDATE t2 SET c1= 3;
+UPDATE t3 SET c1= 3;
+UPDATE t4 SET c1= 3;
+SET @@global.slave_rows_search_algorithms= 'TABLE_SCAN,HASH_SCAN';
+UPDATE t1 SET c1= 4;
+UPDATE t2 SET c1= 4;
+UPDATE t3 SET c1= 4;
+UPDATE t4 SET c1= 4;
+SET @@global.slave_rows_search_algorithms= 'HASH_SCAN';
+UPDATE t1 SET c1= 5;
+UPDATE t2 SET c1= 5;
+UPDATE t3 SET c1= 5;
+UPDATE t4 SET c1= 5;
+SET @@global.slave_rows_search_algorithms= 'HASH_SCAN,INDEX_SCAN';
+UPDATE t1 SET c1= 6;
+UPDATE t2 SET c1= 6;
+UPDATE t3 SET c1= 6;
+UPDATE t4 SET c1= 6;
+SET @@global.slave_rows_search_algorithms= 'HASH_SCAN,INDEX_SCAN,TABLE_SCAN';
+UPDATE t1 SET c1= 7;
+UPDATE t2 SET c1= 7;
+UPDATE t3 SET c1= 7;
+UPDATE t4 SET c1= 7;
+SET @@global.slave_rows_search_algorithms= @saved_slave_rows_search_algorithms;
+DROP TABLE IF EXISTS t1, t2, t3, t4;
+include/rpl_reset.inc
+include/rpl_end.inc

=== modified file 'mysql-test/suite/rpl/r/rpl_row_idempotency.result'
--- a/mysql-test/suite/rpl/r/rpl_row_idempotency.result	2012-03-23 20:11:19 +0000
+++ b/mysql-test/suite/rpl/r/rpl_row_idempotency.result	2012-04-09 13:43:35 +0000
@@ -8,6 +8,8 @@ call mtr.add_suppression("Cannot delete
 call mtr.add_suppression("Cannot add or update a child row: a foreign key constraint fails .* Error_code: 1452");
 call mtr.add_suppression("Duplicate entry .1. for key .PRIMARY.* Error_code: 1062");
 call mtr.add_suppression("The slave coordinator and worker threads are stopped, possibly leaving data in inconsistent state");
+set @saved_slave_rows_search_algorithms= @@global.slave_rows_search_algorithms;
+SET GLOBAL slave_rows_search_algorithms= 'INDEX_SCAN,TABLE_SCAN';
 set @old_slave_exec_mode= @@global.slave_exec_mode;
 set @@global.slave_exec_mode= IDEMPOTENT;
 create table ti1 (b int primary key) engine = innodb;
@@ -168,5 +170,168 @@ start slave sql_thread;
 SET @@global.slave_exec_mode= @old_slave_exec_mode;
 drop table t1,t2,ti2,ti1;
 set @@global.slave_exec_mode= @old_slave_exec_mode;
+include/rpl_reset.inc
+SET GLOBAL slave_rows_search_algorithms= 'INDEX_SCAN,HASH_SCAN';
+set @old_slave_exec_mode= @@global.slave_exec_mode;
+set @@global.slave_exec_mode= IDEMPOTENT;
+create table ti1 (b int primary key) engine = innodb;
+create table ti2 (a int primary key, b int, foreign key (b) references ti1(b))
+engine = innodb;
+set foreign_key_checks=1 /* ensure the check */;
+insert into ti1 values (1),(2),(3);
+insert into ti2 set a=2, b=2;
+select * from ti1 order by b /* must be (1),(2),(3) */;
+b
+1
+2
+3
+insert into ti2 set a=1, b=1;
+select * from ti2 order by b /* must be (1,1) (2,2) */;
+a	b
+1	1
+2	2
+set @save_binlog_format= @@session.binlog_format;
+set @@session.binlog_format= row;
+delete from ti1 where b=1;
+select * from ti1 order by b /* must be (2),(3) */;
+b
+2
+3
+select * from ti1 order by b /* must stays as were on master (1),(2),(3) */;
+b
+1
+2
+3
+delete from ti1 where b=3;
+insert into ti2 set a=3, b=3;
+select * from ti2 order by b /* must be (1,1),(2,2) - not inserted */;
+a	b
+1	1
+2	2
+set global slave_exec_mode='IDEMPOTENT';
+set global slave_exec_mode='STRICT';
+set global slave_exec_mode='IDEMPOTENT,STRICT';
+ERROR 42000: Variable 'slave_exec_mode' can't be set to the value of 'IDEMPOTENT,STRICT'
+select @@global.slave_exec_mode /* must be STRICT */;
+@@global.slave_exec_mode
+STRICT
+*** foreign keys errors as above now forces to stop
+set foreign_key_checks=0;
+drop table ti2, ti1;
+create table ti1 (b int primary key) engine = innodb;
+create table ti2 (a int primary key, b int, foreign key (b) references ti1(b))
+engine = innodb;
+set foreign_key_checks=1 /* ensure the check */;
+insert into ti1 values (1),(2),(3);
+insert into ti2 set a=2, b=2;
+select * from ti1 order by b /* must be (1),(2),(3) */;
+b
+1
+2
+3
+*** conspire future problem
+insert into ti2 set a=1, b=1;
+select * from ti2 order by b /* must be (1,1) (2,2) */;
+a	b
+1	1
+2	2
+delete from ti1 where b=1 /* offending delete event */;
+select * from ti1 order by b /* must be (2),(3) */;
+b
+2
+3
+*** slave must stop (Trying to delete a referenced foreing key)
+include/wait_for_slave_sql_to_stop.inc
+Last_SQL_Error
+1451
+select * from ti1 order by b /* must be (1),(2),(3) - not deleted */;
+b
+1
+2
+3
+set foreign_key_checks= 0;
+delete from ti2 where b=1;
+set foreign_key_checks= 1;
+set global slave_exec_mode='IDEMPOTENT';
+start slave sql_thread;
+set global slave_exec_mode='STRICT';
+*** conspire the following insert failure
+*** conspire future problem
+delete from ti1 where b=3;
+insert into ti2 set a=3, b=3 /* offending write event */;
+*** slave must stop (Trying to insert an invalid foreign key)
+include/wait_for_slave_sql_to_stop.inc
+Last_SQL_Error
+1452
+select * from ti2 order by b /* must be (2,2) */;
+a	b
+2	2
+set foreign_key_checks= 0;
+insert into ti1 set b=3;
+set foreign_key_checks= 1;
+set global slave_exec_mode='IDEMPOTENT';
+start slave sql_thread;
+set global slave_exec_mode='STRICT';
+select * from ti2 order by b /* must be (2,2),(3,3) */;
+a	b
+2	2
+3	3
+*** other errors
+*** conspiring query
+insert into ti1 set b=1;
+insert into ti1 set b=1 /* offending write event */;
+*** slave must stop (Trying to insert a dupliacte key)
+include/wait_for_slave_sql_to_stop.inc
+Last_SQL_Error
+1062
+set foreign_key_checks= 0;
+delete from ti1 where b=1;
+set foreign_key_checks= 1;
+set global slave_exec_mode='IDEMPOTENT';
+start slave sql_thread;
+set global slave_exec_mode='STRICT';
+CREATE TABLE t1 (a INT PRIMARY KEY);
+CREATE TABLE t2 (a INT);
+INSERT INTO t1 VALUES (-1),(-2),(-3);
+INSERT INTO t2 VALUES (-1),(-2),(-3);
+DELETE FROM t1 WHERE a = -2;
+DELETE FROM t2 WHERE a = -2;
+DELETE FROM t1 WHERE a = -2;
+*** slave must stop (Key was not found)
+include/wait_for_slave_sql_to_stop.inc
+Last_SQL_Error
+1032
+set global slave_exec_mode='IDEMPOTENT';
+start slave sql_thread;
+set global slave_exec_mode='STRICT';
+DELETE FROM t2 WHERE a = -2;
+*** slave must stop (Key was not found)
+include/wait_for_slave_sql_to_stop.inc
+Last_SQL_Error
+1032
+set global slave_exec_mode='IDEMPOTENT';
+start slave sql_thread;
+set global slave_exec_mode='STRICT';
+UPDATE t1 SET a = 1 WHERE a = -1;
+UPDATE t2 SET a = 1 WHERE a = -1;
+UPDATE t1 SET a = 1 WHERE a = -1;
+*** slave must stop (Key was not found)
+include/wait_for_slave_sql_to_stop.inc
+Last_SQL_Error
+1032
+set global slave_exec_mode='IDEMPOTENT';
+start slave sql_thread;
+set global slave_exec_mode='STRICT';
+UPDATE t2 SET a = 1 WHERE a = -1;
+*** slave must stop (Key was not found)
+include/wait_for_slave_sql_to_stop.inc
+Last_SQL_Error
+1032
+set global slave_exec_mode='IDEMPOTENT';
+start slave sql_thread;
+SET @@global.slave_exec_mode= @old_slave_exec_mode;
+drop table t1,t2,ti2,ti1;
+set @@global.slave_exec_mode= @old_slave_exec_mode;
+set @@global.slave_rows_search_algorithms= @saved_slave_rows_search_algorithms;
 *** end of tests
 include/rpl_end.inc

=== modified file 'mysql-test/suite/rpl/r/rpl_row_rec_comp_innodb.result'
--- a/mysql-test/suite/rpl/r/rpl_row_rec_comp_innodb.result	2012-03-23 20:11:19 +0000
+++ b/mysql-test/suite/rpl/r/rpl_row_rec_comp_innodb.result	2012-04-09 13:43:35 +0000
@@ -28,10 +28,4 @@ INSERT INTO t1(c1) VALUES (NULL);
 UPDATE t1 SET c1= 0;
 include/diff_tables.inc [master:t1, slave:t1]
 DROP TABLE t1;
-include/rpl_reset.inc
-CREATE TABLE t1 (c1 int(11) NOT NULL, c2 int(11) NOT NULL, c3 int(11) DEFAULT '-1') ENGINE=InnoDB DEFAULT CHARSET=latin1;
-INSERT INTO t1 VALUES (1,2,NULL);
-UPDATE t1 SET c1=1, c2=2, c3=-1 WHERE c1=1 AND c2=2 AND ISNULL(c3);
-include/diff_tables.inc [master:test.t1, slave:test.t1]
-DROP TABLE t1;
 include/rpl_end.inc

=== modified file 'mysql-test/suite/rpl/r/rpl_row_rec_comp_myisam.result'
--- a/mysql-test/suite/rpl/r/rpl_row_rec_comp_myisam.result	2012-03-23 20:11:19 +0000
+++ b/mysql-test/suite/rpl/r/rpl_row_rec_comp_myisam.result	2012-04-09 13:43:35 +0000
@@ -37,10 +37,4 @@ INSERT INTO t1(c1) VALUES (NULL);
 UPDATE t1 SET c1= 0;
 include/diff_tables.inc [master:t1, slave:t1]
 DROP TABLE t1;
-include/rpl_reset.inc
-CREATE TABLE t1 (c1 int(11) NOT NULL, c2 int(11) NOT NULL, c3 int(11) DEFAULT '-1') ENGINE=MyISAM DEFAULT CHARSET=latin1;
-INSERT INTO t1 VALUES (1,2,NULL);
-UPDATE t1 SET c1=1, c2=2, c3=-1 WHERE c1=1 AND c2=2 AND ISNULL(c3);
-include/diff_tables.inc [master:test.t1, slave:test.t1]
-DROP TABLE t1;
 include/rpl_end.inc

=== modified file 'mysql-test/suite/rpl/t/rpl_bug26395.test'
--- a/mysql-test/suite/rpl/t/rpl_bug26395.test	2011-08-19 13:04:28 +0000
+++ b/mysql-test/suite/rpl/t/rpl_bug26395.test	2012-02-17 18:24:06 +0000
@@ -74,14 +74,13 @@ sync_with_master 0;
 --echo ==== Verify results on slave ====
 
 source include/stop_slave.inc;
-let $tmp= query_get_value("SHOW SLAVE STATUS", Slave_IO_State, 1);
-eval SELECT "$tmp" AS Slave_IO_State;
-let $tmp= query_get_value("SHOW SLAVE STATUS", Last_SQL_Error, 1);
-eval SELECT "$tmp" AS Last_SQL_Error;
-let $tmp= query_get_value("SHOW SLAVE STATUS", Last_IO_Error, 1);
-eval SELECT "$tmp" AS Last_IO_Error;
-SELECT * FROM tinnodb ORDER BY a;
 
+--let $status_items=Slave_IO_State,Last_SQL_Error,Last_IO_Error
+--source include/show_slave_status.inc
+
+--let $assert_text= Assert that the slave table has no rows
+--let $assert_cond= `SELECT COUNT(*) = 0 FROM tinnodb`
+--source include/assert.inc
 
 --echo ==== Clean up ====
 

=== modified file 'mysql-test/suite/rpl/t/rpl_parallel_start_stop.test'
--- a/mysql-test/suite/rpl/t/rpl_parallel_start_stop.test	2012-04-23 08:14:28 +0000
+++ b/mysql-test/suite/rpl/t/rpl_parallel_start_stop.test	2012-04-24 13:39:42 +0000
@@ -149,6 +149,7 @@ begin;
 # set up a deadlock
 update t1 set a=31 where a=3;
 insert into t1 values (5),(6),(7);
+update t1 set a=a+10;
 
 connection master;
 

=== modified file 'mysql-test/suite/rpl/t/rpl_relayrotate.test'
--- a/mysql-test/suite/rpl/t/rpl_relayrotate.test	2010-12-19 17:07:28 +0000
+++ b/mysql-test/suite/rpl/t/rpl_relayrotate.test	2012-02-13 19:05:40 +0000
@@ -7,6 +7,7 @@
 -- source include/not_ndb_default.inc
 -- source include/have_innodb.inc
 -- source include/master-slave.inc
+call mtr.add_suppression('Slave SQL: Request to stop slave SQL Thread received while applying a group that has non-transactional changes; waiting for completion of the group');
 let $engine_type=innodb;
 -- source extra/rpl_tests/rpl_relayrotate.test
 --source include/rpl_end.inc

=== removed file 'mysql-test/suite/rpl/t/rpl_row_find_row_debug.test'
--- a/mysql-test/suite/rpl/t/rpl_row_find_row_debug.test	2011-11-15 13:04:11 +0000
+++ b/mysql-test/suite/rpl/t/rpl_row_find_row_debug.test	1970-01-01 00:00:00 +0000
@@ -1,64 +0,0 @@
-#
-# Bug#11760927: 53375: RBR + NO PK => HIGH LOAD ON SLAVE (TABLE SCAN/CPU) => SLAVE FAILURE
-#
---disable_warnings
---source include/master-slave.inc
---enable_warnings
---source include/have_binlog_format_row.inc
---source include/have_debug.inc
-
-# SETUP
-# - setup log_warnings and debug 
---connection slave
---source include/stop_slave.inc
---let $debug_save= `SELECT @@GLOBAL.debug`
---let $log_warnings_save= `SELECT @@GLOBAL.log_warnings`
-
-SET GLOBAL log_warnings = 2;
-
-let $log_error_= `SELECT @@GLOBAL.log_error`;
-if(!$log_error_)
-{
-    # MySQL Server on windows is started with --console and thus
-    # does not know the location of its .err log, use default location
-    let $log_error_ = $MYSQLTEST_VARDIR/log/mysqld.2.err;
-}
-
-# Assign env variable LOG_ERROR
-let LOG_ERROR=$log_error_;
-
-# force printing the notes to the error log
-SET GLOBAL debug="d,inject_long_find_row_note";
---source include/start_slave.inc
-
-# test
---connection master
-CREATE TABLE t1 (c1 INT);
---sync_slave_with_master
---connection master
-
-INSERT INTO t1 VALUES (1), (2);
-UPDATE t1 SET c1= 1000 WHERE c1=2;
-DELETE FROM t1;
-DROP TABLE t1;
---sync_slave_with_master
-
---echo # Check if any note related to long DELETE_ROWS and UPDATE_ROWS appears in the error log
-perl;
-  use strict;
-  my $log_error= $ENV{'LOG_ERROR'} or die "LOG_ERROR not set";
-  open(FILE, "$log_error") or die("Unable to open $log_error: $!\n");
-  my $upd_count = () = grep(/The slave is applying a ROW event on behalf of an UPDATE statement on table t1 and is currently taking a considerable amount/g,<FILE>);
-  seek(FILE, 0, 0) or die "Can't seek to beginning of file: $!";
-  my $del_count = () = grep(/The slave is applying a ROW event on behalf of a DELETE statement on table t1 and is currently taking a considerable amount/g,<FILE>);
-  print "Occurrences: update=$upd_count, delete=$del_count\n";
-  close(FILE);
-EOF
-
-# cleanup
---source include/stop_slave.inc
---eval SET GLOBAL debug = '$debug_save'
---eval SET GLOBAL log_warnings = $log_warnings_save
---source include/start_slave.inc
-
---source include/rpl_end.inc

=== added file 'mysql-test/suite/rpl/t/rpl_row_hash_scan.test'
--- a/mysql-test/suite/rpl/t/rpl_row_hash_scan.test	1970-01-01 00:00:00 +0000
+++ b/mysql-test/suite/rpl/t/rpl_row_hash_scan.test	2012-04-25 11:18:57 +0000
@@ -0,0 +1,230 @@
+#
+# Test cases for WL#5597
+#
+# In this file, we only test for the following cases:
+
+#
+# CASE #1: update/delete multiple records from a table that share the
+#          same hashtable key (in slave HASH_SCAN algorithm).
+
+#
+# CASE #2: same as CASE #1, but the reason is that the master has more
+#          columns than the slave, thence duplicate keys in slave's
+#          hashtable are a side effect, but should not be a problem.
+
+#
+# CASE #3: the slave stops gracefully when it is updating a row that
+#          does not exist on its table.
+
+#
+# CASE #4: update/delete multiple records with blobs. Given that blobs
+#          are not included in hashing, some records keys will collide.
+
+#
+# CASE #5: update/delete tables with only blob columns.
+# 
+
+-- source include/have_binlog_format_row.inc
+-- source include/master-slave.inc
+
+call mtr.add_suppression("Slave SQL: Could not execute Update_rows event on table test.t1; Can't find record in 't1', Error_code: 1032; handler error HA_ERR_END_OF_FILE; the event's master log master-bin.[0-9]*, end_log_pos [0-9]*, Error_code: 1032");
+call mtr.add_suppression("Slave: Can't find record in 't1' Error_code: 1032");
+call mtr.add_suppression("Slave SQL: Could not execute Delete_rows event on table test.t1; Can't find record in 't1', Error_code: 1032; handler error HA_ERR_END_OF_FILE; the event's master log master-bin.[0-9]*, end_log_pos [0-9]*, Error_code: 1032");
+call mtr.add_suppression("Slave SQL: ... The slave coordinator and worker threads are stopped, possibly leaving data in inconsistent state. A restart should restore consistency automatically, although using non-transactional storage for data or info tables or DDL queries could lead to problems. In such cases you have to examine your data (see documentation for details). Error_code: 1756");
+call mtr.add_suppression("Slave SQL: Could not execute Delete_rows event on table test.t1; Can't find record in 't1', Error_code: 1032; handler error HA_ERR_END_OF_FILE; the event's master log FIRST, end_log_pos [0-9]*, Error_code: 1032");
+
+--connection slave
+SET @saved_slave_rows_search_algorithms= @@global.slave_rows_search_algorithms;
+SET GLOBAL slave_rows_search_algorithms= 'INDEX_SCAN,HASH_SCAN';
+
+#
+# CASE #1: entries that generate the same key for the slave internal
+#          hash table.
+#
+# ASSERTS that no updates are lost due to having multiple entries for
+#         the same hashtable key in the slave HASH_SCAN.
+#
+
+-- connection master
+CREATE TABLE t1 (a INT);
+INSERT INTO t1 VALUES (1), (1), (2), (3);
+-- sync_slave_with_master
+DELETE FROM t1;
+
+# try to change the order of the rows in the engine.
+INSERT INTO t1 VALUES (2), (1), (3), (1);
+
+-- connection master
+UPDATE t1 SET a=1000 WHERE a=1;
+-- sync_slave_with_master
+
+--let $diff_tables= master:test.t1, slave:test.t1
+-- source include/diff_tables.inc
+
+-- connection master
+DELETE FROM t1 WHERE a=1000;
+DELETE FROM t1 WHERE a=2 OR a=3;
+-- sync_slave_with_master
+
+--let $diff_tables= master:test.t1, slave:test.t1
+-- source include/diff_tables.inc
+
+#cleanup for case#1
+--connection master
+DROP TABLE  t1; 
+-- sync_slave_with_master
+
+-- source include/rpl_reset.inc
+
+# CASE #2: entries generating the same key for the slave internal
+#          hashtable because master table has more columns than the
+#          slave's.
+#
+# ASSERTS that no updates are lost due to having multiple entries for
+#         the same hashtable key in the slave HASH_SCAN when master
+#         has more tables than the slave.
+
+-- connection master
+
+SET SQL_LOG_BIN=0;
+CREATE TABLE t1 (a INT, b INT);
+SET SQL_LOG_BIN=1;
+-- connection slave
+CREATE TABLE t1 (a INT);
+-- connection master
+INSERT INTO t1 VALUES (1,1), (1,2), (2,1), (2,2);
+UPDATE t1 SET a=1000 WHERE a=1;
+
+SELECT * FROM t1;
+-- sync_slave_with_master
+SELECT * FROM t1;
+
+-- connection master
+DELETE FROM t1 WHERE a=1000;
+DELETE FROM t1 WHERE a=2;
+SELECT * FROM t1;
+-- sync_slave_with_master
+SELECT * FROM t1;
+
+#cleanup for case#2
+--connection master
+DROP TABLE  t1;
+-- sync_slave_with_master
+
+-- source include/rpl_reset.inc
+
+#
+# CASE #3: The master updates and deletes some row that the slave does
+#          not have.
+#
+# ASSERTS that the slave shall fail gracefully when the row is not found.
+#
+
+-- connection master
+
+CREATE TABLE t1 (a INT);
+INSERT INTO t1 VALUES (1), (1), (2), (3);
+-- sync_slave_with_master
+DELETE FROM t1 WHERE a=1;
+DELETE FROM t1 WHERE a=2;
+
+-- connection master
+UPDATE t1 SET a=1000 WHERE a=1;
+-- let $slave_sql_errno= 1032
+-- source include/wait_for_slave_sql_error_and_skip.inc
+
+-- connection master
+DELETE FROM t1 WHERE a=2;
+-- let $slave_sql_errno= 1032
+-- source include/wait_for_slave_sql_error_and_skip.inc
+DROP TABLE t1;
+-- sync_slave_with_master
+
+-- source include/rpl_reset.inc
+
+#
+# CASE #4: covers the case of tables that have blobs in them.
+#
+# ASSERTS that there are no lost updates
+
+-- connection master
+
+CREATE TABLE t1 (a INT, b TINYBLOB);
+INSERT INTO t1 VALUES (1,'a'), (1, 'b'), (2,'aa'), (2, 'aa');
+
+UPDATE t1 SET b='c' WHERE a=1;
+-- sync_slave_with_master
+--let $diff_tables= master:test.t1, slave:test.t1
+-- source include/diff_tables.inc
+
+-- connection master
+
+UPDATE t1 SET a=10000 WHERE b='aa';
+-- sync_slave_with_master
+
+--let $diff_tables= master:test.t1, slave:test.t1
+-- source include/diff_tables.inc
+
+-- connection master
+
+UPDATE t1 SET b='c' WHERE b='aa';
+-- sync_slave_with_master
+
+--let $diff_tables= master:test.t1, slave:test.t1
+-- source include/diff_tables.inc
+
+-- connection master
+
+DELETE FROM t1 WHERE b='c';
+-- sync_slave_with_master
+
+--let $diff_tables= master:test.t1, slave:test.t1
+-- source include/diff_tables.inc
+
+#cleanup for case#4
+--connection master
+DROP TABLE  t1;
+-- sync_slave_with_master
+
+-- source include/rpl_reset.inc
+
+#
+# CASE #5: covers the case in which the table has only blobs in it.
+#
+# ASSERTS that there are no issues even if blobs are skipped from the
+#         hashing. Tables on master and slave will not go out-of-sync.
+#
+
+-- connection master
+
+CREATE TABLE t1 (a TINYBLOB, b TINYBLOB);
+INSERT INTO t1 VALUES ('a','a'), ('b', 'b'), ('a','aa'), ('a', 'aa');
+
+UPDATE t1 SET b='c' WHERE b='aa';
+-- sync_slave_with_master
+
+--let $diff_tables= master:test.t1, slave:test.t1
+-- source include/diff_tables.inc
+
+-- connection master
+
+DELETE FROM t1;
+-- sync_slave_with_master
+
+--let  $diff_tables= master:test.t1, slave:test.t1
+-- source include/diff_tables.inc
+
+-- connection master
+INSERT INTO t1 VALUES (NULL,NULL), (NULL, NULL);
+DELETE FROM t1;
+
+-- sync_slave_with_master
+--let $diff_tables= master:test.t1, slave:test.t1
+-- source include/diff_tables.inc
+
+#cleanup for case#5
+--connection master
+DROP TABLE  t1;
+-- sync_slave_with_master
+SET @@global.slave_rows_search_algorithms= @saved_slave_rows_search_algorithms;
+--source include/rpl_end.inc

=== added file 'mysql-test/suite/rpl/t/rpl_row_hash_scan_sanity.test'
--- a/mysql-test/suite/rpl/t/rpl_row_hash_scan_sanity.test	1970-01-01 00:00:00 +0000
+++ b/mysql-test/suite/rpl/t/rpl_row_hash_scan_sanity.test	2012-03-16 06:05:15 +0000
@@ -0,0 +1,178 @@
+-- source include/master-slave.inc
+-- source include/have_binlog_format_row.inc
+-- source include/have_debug.inc
+
+#
+# WL#5597 tests
+#
+# These tests check whether the correct algorithm for searching the
+# rows was chosen, depending on the setting of
+# @@global.slave_rows_search_algorithms and the table definition.
+#
+# We test all combinations, but leave out the offending ones:
+# - @@global.slave_rows_search_algorithms= ''
+# - @@global.slave_rows_search_algorithms= 'INDEX_SCAN'
+#
+# We do not allow setting only INDEX_SCAN or the empty value.
+#
+
+-- connection master
+
+CREATE TABLE t1 (c1 INT);
+CREATE TABLE t2 (c1 INT PRIMARY KEY);
+CREATE TABLE t3 (c1 INT UNIQUE KEY NOT NULL);
+CREATE TABLE t4 (c1 INT KEY);
+
+INSERT INTO t1 VALUES (1);
+INSERT INTO t2 VALUES (1);
+INSERT INTO t3 VALUES (1);
+INSERT INTO t4 VALUES (1);
+
+-- sync_slave_with_master
+SET @saved_slave_rows_search_algorithms= @@global.slave_rows_search_algorithms;
+
+###################### TABLE_SCAN assertions
+
+-- connection slave
+SET @@global.slave_rows_search_algorithms= 'TABLE_SCAN';
+-- connection master
+
+UPDATE t1 SET c1= 2;
+-- let $expected_alg= 'TABLE_SCAN'
+-- source include/rpl_hash_scan_assertion.inc
+
+UPDATE t2 SET c1= 2;
+-- let $expected_alg= 'TABLE_SCAN'
+-- source include/rpl_hash_scan_assertion.inc
+
+UPDATE t3 SET c1= 2;
+-- let $expected_alg= 'TABLE_SCAN'
+-- source include/rpl_hash_scan_assertion.inc
+
+UPDATE t4 SET c1= 2;
+-- let $expected_alg= 'TABLE_SCAN'
+-- source include/rpl_hash_scan_assertion.inc
+
+###################### TABLE_SCAN,INDEX_SCAN
+
+-- connection slave
+SET @@global.slave_rows_search_algorithms= 'TABLE_SCAN,INDEX_SCAN';
+-- connection master
+
+UPDATE t1 SET c1= 3;
+-- let $expected_alg= 'TABLE_SCAN'
+-- source include/rpl_hash_scan_assertion.inc
+
+UPDATE t2 SET c1= 3;
+-- let $expected_alg= 'INDEX_SCAN'
+-- source include/rpl_hash_scan_assertion.inc
+
+UPDATE t3 SET c1= 3;
+-- let $expected_alg= 'INDEX_SCAN'
+-- source include/rpl_hash_scan_assertion.inc
+
+UPDATE t4 SET c1= 3;
+-- let $expected_alg= 'INDEX_SCAN'
+-- source include/rpl_hash_scan_assertion.inc
+
+###################### TABLE_SCAN,HASH_SCAN
+
+-- connection slave
+SET @@global.slave_rows_search_algorithms= 'TABLE_SCAN,HASH_SCAN';
+-- connection master
+
+UPDATE t1 SET c1= 4;
+-- let $expected_alg= 'HASH_SCAN'
+-- source include/rpl_hash_scan_assertion.inc
+
+UPDATE t2 SET c1= 4;
+-- let $expected_alg= 'HASH_SCAN'
+-- source include/rpl_hash_scan_assertion.inc
+
+UPDATE t3 SET c1= 4;
+-- let $expected_alg= 'HASH_SCAN'
+-- source include/rpl_hash_scan_assertion.inc
+
+UPDATE t4 SET c1= 4;
+-- let $expected_alg= 'HASH_SCAN'
+-- source include/rpl_hash_scan_assertion.inc
+
+###################### HASH_SCAN
+
+-- connection slave
+SET @@global.slave_rows_search_algorithms= 'HASH_SCAN';
+-- connection master
+
+UPDATE t1 SET c1= 5;
+-- let $expected_alg= 'HASH_SCAN'
+-- source include/rpl_hash_scan_assertion.inc
+
+UPDATE t2 SET c1= 5;
+-- let $expected_alg= 'HASH_SCAN'
+-- source include/rpl_hash_scan_assertion.inc
+
+UPDATE t3 SET c1= 5;
+-- let $expected_alg= 'HASH_SCAN'
+-- source include/rpl_hash_scan_assertion.inc
+
+UPDATE t4 SET c1= 5;
+-- let $expected_alg= 'HASH_SCAN'
+-- source include/rpl_hash_scan_assertion.inc
+
+###################### HASH_SCAN,INDEX_SCAN
+
+-- connection slave
+SET @@global.slave_rows_search_algorithms= 'HASH_SCAN,INDEX_SCAN';
+-- connection master
+
+UPDATE t1 SET c1= 6;
+-- let $expected_alg= 'HASH_SCAN'
+-- source include/rpl_hash_scan_assertion.inc
+
+UPDATE t2 SET c1= 6;
+-- let $expected_alg= 'INDEX_SCAN'
+-- source include/rpl_hash_scan_assertion.inc
+
+UPDATE t3 SET c1= 6;
+-- let $expected_alg= 'INDEX_SCAN'
+-- source include/rpl_hash_scan_assertion.inc
+
+UPDATE t4 SET c1= 6;
+-- let $expected_alg= 'INDEX_SCAN'
+-- source include/rpl_hash_scan_assertion.inc
+
+###################### HASH_SCAN,INDEX_SCAN,TABLE_SCAN
+
+-- connection slave
+SET @@global.slave_rows_search_algorithms= 'HASH_SCAN,INDEX_SCAN,TABLE_SCAN';
+-- connection master
+
+UPDATE t1 SET c1= 7;
+-- let $expected_alg= 'HASH_SCAN'
+-- source include/rpl_hash_scan_assertion.inc
+
+UPDATE t2 SET c1= 7;
+-- let $expected_alg= 'INDEX_SCAN'
+-- source include/rpl_hash_scan_assertion.inc
+
+UPDATE t3 SET c1= 7;
+-- let $expected_alg= 'INDEX_SCAN'
+-- source include/rpl_hash_scan_assertion.inc
+
+UPDATE t4 SET c1= 7;
+-- let $expected_alg= 'INDEX_SCAN'
+-- source include/rpl_hash_scan_assertion.inc
+
+-- connection slave
+
+SET @@global.slave_rows_search_algorithms= @saved_slave_rows_search_algorithms;
+
+#clean Up
+--connection master
+DROP TABLE IF EXISTS t1, t2, t3, t4;
+--sync_slave_with_master
+  
+-- source include/rpl_reset.inc
+
+-- source include/rpl_end.inc
+

=== modified file 'mysql-test/suite/rpl/t/rpl_row_idempotency.test'
--- a/mysql-test/suite/rpl/t/rpl_row_idempotency.test	2011-08-19 13:04:28 +0000
+++ b/mysql-test/suite/rpl/t/rpl_row_idempotency.test	2011-12-15 09:11:40 +0000
@@ -14,318 +14,16 @@ call mtr.add_suppression("Cannot delete
 call mtr.add_suppression("Cannot add or update a child row: a foreign key constraint fails .* Error_code: 1452");
 call mtr.add_suppression("Duplicate entry .1. for key .PRIMARY.* Error_code: 1062");
 call mtr.add_suppression("The slave coordinator and worker threads are stopped, possibly leaving data in inconsistent state");
+set @saved_slave_rows_search_algorithms= @@global.slave_rows_search_algorithms;
 
-# bug#31609 Not all RBR slave errors reported as errors
-# bug#31552 Replication breaks when deleting rows from out-of-sync table
-#           without PK
+SET GLOBAL slave_rows_search_algorithms= 'INDEX_SCAN,TABLE_SCAN';
+-- source extra/rpl_tests/rpl_row_idempotency.test
 
-# The default for slave-exec-mode option and server
-# variable slave_exec_mode  is 'STRICT'.
-# When 'STRICT' mode is set, the slave SQL thread will stop whenever
-# the row to change is not found. In 'IDEMPOTENT' mode, the SQL thread
-# will continue running and apply the row - replace if it's Write_rows event -
-# or skip to the next event.
+-- source include/rpl_reset.inc
 
-# the previous part of the tests was with IDEMPOTENT slave's mode.
-
-
-#
-# Other than above idempotent errors dealing with foreign keys constraint
-#
-connection slave;
-
-set @old_slave_exec_mode= @@global.slave_exec_mode;
-set @@global.slave_exec_mode= IDEMPOTENT;
-
-connection master;
-
-create table ti1 (b int primary key) engine = innodb;
-create table ti2 (a int primary key, b int, foreign key (b) references ti1(b))
-     engine = innodb;
-set foreign_key_checks=1 /* ensure the check */;
-
-insert into ti1 values (1),(2),(3);
-insert into ti2 set a=2, b=2;
-
-sync_slave_with_master;
-
-#connection slave;
-select * from ti1 order by b /* must be (1),(2),(3) */;
-insert into ti2 set a=1, b=1;
-select * from ti2 order by b /* must be (1,1) (2,2) */;
-
-connection master;
-
-# from now on checking rbr specific idempotent errors
-set @save_binlog_format= @@session.binlog_format;
-set @@session.binlog_format= row;
-delete from ti1 where b=1;
-
-select * from ti1 order by b /* must be (2),(3) */;
-
-# slave must catch up (expect some warnings in error.log)
-sync_slave_with_master;
-
-#connection slave;
-select * from ti1 order by b /* must stays as were on master (1),(2),(3) */;
-
-delete from ti1 where b=3;
-
-connection master;
-insert into ti2 set a=3, b=3;
-
-# slave must catch up (expect some warnings in error.log)
-sync_slave_with_master;
-
-#connection slave;
-select * from ti2 order by b /* must be (1,1),(2,2) - not inserted */;
-
-
-#
-# Checking the new global sys variable
-#
-
-connection slave;
-
-set global slave_exec_mode='IDEMPOTENT';
-set global slave_exec_mode='STRICT';
-
-# checking mutual exclusion for the options
---error ER_WRONG_VALUE_FOR_VAR
-set global slave_exec_mode='IDEMPOTENT,STRICT';
-
-select @@global.slave_exec_mode /* must be STRICT */;
-
-#
-# Checking stops.
-# In the following sections strict slave sql thread is going to
-# stop when faces an idempotent error. In order to proceed
-# the mode is temporarily switched to indempotent.
-#
-
-#
---echo *** foreign keys errors as above now forces to stop
-#
-
-connection master;
-
-set foreign_key_checks=0;
-drop table ti2, ti1;
-
-create table ti1 (b int primary key) engine = innodb;
-create table ti2 (a int primary key, b int, foreign key (b) references ti1(b))
-     engine = innodb;
-set foreign_key_checks=1 /* ensure the check */;
-
-insert into ti1 values (1),(2),(3);
-insert into ti2 set a=2, b=2;
-
-sync_slave_with_master;
-
-#connection slave;
-select * from ti1 order by b /* must be (1),(2),(3) */;
---echo *** conspire future problem
-insert into ti2 set a=1, b=1;
-select * from ti2 order by b /* must be (1,1) (2,2) */;
-
-connection master;
-
-delete from ti1 where b=1 /* offending delete event */;
-select * from ti1 order by b /* must be (2),(3) */;
-
-# foreign key: row is referenced
-
---echo *** slave must stop (Trying to delete a referenced foreing key)
-connection slave;
-source include/wait_for_slave_sql_to_stop.inc;
-
-let $last_error = query_get_value("SHOW SLAVE STATUS", Last_SQL_Errno, 1);
-disable_query_log;
-eval SELECT "$last_error" AS Last_SQL_Error;
-enable_query_log;
-
-select * from ti1 order by b /* must be (1),(2),(3) - not deleted */;
-set foreign_key_checks= 0;
-delete from ti2 where b=1;
-set foreign_key_checks= 1;
-set global slave_exec_mode='IDEMPOTENT';
-start slave sql_thread;
-connection master;
-sync_slave_with_master;
-#connection slave;
-set global slave_exec_mode='STRICT';
-
-connection master;
-
-sync_slave_with_master;
-
-#connection slave;
---echo *** conspire the following insert failure
-# foreign key: no referenced row
-
---echo *** conspire future problem
-delete from ti1 where b=3;
-
-connection master;
-insert into ti2 set a=3, b=3 /* offending write event */;
-
---echo *** slave must stop (Trying to insert an invalid foreign key)
-connection slave;
-source include/wait_for_slave_sql_to_stop.inc;
-
-let $last_error = query_get_value("SHOW SLAVE STATUS", Last_SQL_Errno, 1);
-disable_query_log;
-eval SELECT "$last_error" AS Last_SQL_Error;
-enable_query_log;
-
-select * from ti2 order by b /* must be (2,2) */;
-set foreign_key_checks= 0;
-insert into ti1 set b=3;
-set foreign_key_checks= 1;
-set global slave_exec_mode='IDEMPOTENT';
-start slave sql_thread;
-connection master;
-sync_slave_with_master;
-#connection slave;
-set global slave_exec_mode='STRICT';
-
-connection master;
-
-sync_slave_with_master;
-
-select * from ti2 order by b /* must be (2,2),(3,3) */;
-
-# 
---echo *** other errors
-# 
-
-# dup key insert
-
-#connection slave;
---echo *** conspiring query
-insert into ti1 set b=1;
-
-connection master;
-insert into ti1 set b=1 /* offending write event */;
-
---echo *** slave must stop (Trying to insert a dupliacte key)
-connection slave;
-source include/wait_for_slave_sql_to_stop.inc;
-
-let $last_error = query_get_value("SHOW SLAVE STATUS", Last_SQL_Errno, 1);
-disable_query_log;
-eval SELECT "$last_error" AS Last_SQL_Error;
-enable_query_log;
-
-set foreign_key_checks= 0;
-delete from ti1 where b=1;
-set foreign_key_checks= 1;
-set global slave_exec_mode='IDEMPOTENT';
-start slave sql_thread;
-connection master;
-sync_slave_with_master;
-#connection slave;
-set global slave_exec_mode='STRICT';
-
-# key not found
-
-connection master;
-
-CREATE TABLE t1 (a INT PRIMARY KEY);
-CREATE TABLE t2 (a INT);
-INSERT INTO t1 VALUES (-1),(-2),(-3);
-INSERT INTO t2 VALUES (-1),(-2),(-3);
-sync_slave_with_master;
-
-#connection slave;
-DELETE FROM t1 WHERE a = -2;
-DELETE FROM t2 WHERE a = -2;
-connection master;
-DELETE FROM t1 WHERE a = -2;
-
---echo *** slave must stop (Key was not found)
-connection slave;
-source include/wait_for_slave_sql_to_stop.inc;
-
-let $last_error = query_get_value("SHOW SLAVE STATUS", Last_SQL_Errno, 1);
-disable_query_log;
-eval SELECT "$last_error" AS Last_SQL_Error;
-enable_query_log;
-
-set global slave_exec_mode='IDEMPOTENT';
-start slave sql_thread;
-connection master;
-sync_slave_with_master;
-#connection slave;
-set global slave_exec_mode='STRICT';
-
-connection master;
-DELETE FROM t2 WHERE a = -2; 
---echo *** slave must stop (Key was not found)
-connection slave;
-source include/wait_for_slave_sql_to_stop.inc;
-
-let $last_error = query_get_value("SHOW SLAVE STATUS", Last_SQL_Errno, 1);
-disable_query_log;
-eval SELECT "$last_error" AS Last_SQL_Error;
-enable_query_log;
-
-set global slave_exec_mode='IDEMPOTENT';
-start slave sql_thread;
-connection master;
-sync_slave_with_master;
-#connection slave;
-set global slave_exec_mode='STRICT';
-
-UPDATE t1 SET a = 1 WHERE a = -1;
-UPDATE t2 SET a = 1 WHERE a = -1;
-
-connection master;
-UPDATE t1 SET a = 1 WHERE a = -1;
-
---echo *** slave must stop (Key was not found)
-connection slave;
-source include/wait_for_slave_sql_to_stop.inc;
-
-let $last_error = query_get_value("SHOW SLAVE STATUS", Last_SQL_Errno, 1);
-disable_query_log;
-eval SELECT "$last_error" AS Last_SQL_Error;
-enable_query_log;
-
-set global slave_exec_mode='IDEMPOTENT';
-start slave sql_thread;
-connection master;
-sync_slave_with_master;
-#connection slave;
-set global slave_exec_mode='STRICT';
-
-
-connection master;
-UPDATE t2 SET a = 1 WHERE a = -1;
-
---echo *** slave must stop (Key was not found)
-connection slave;
-source include/wait_for_slave_sql_to_stop.inc;
-
-let $last_error = query_get_value("SHOW SLAVE STATUS", Last_SQL_Errno, 1);
-disable_query_log;
-eval SELECT "$last_error" AS Last_SQL_Error;
-enable_query_log;
-
-set global slave_exec_mode='IDEMPOTENT';
-start slave sql_thread;
-connection master;
-sync_slave_with_master;
-#connection slave;
-SET @@global.slave_exec_mode= @old_slave_exec_mode;
-
-# cleanup for bug#31609 tests
-
-connection master;
-
-drop table t1,t2,ti2,ti1;
-sync_slave_with_master;
-set @@global.slave_exec_mode= @old_slave_exec_mode;
+SET GLOBAL slave_rows_search_algorithms= 'INDEX_SCAN,HASH_SCAN';
+-- source extra/rpl_tests/rpl_row_idempotency.test
 
+set @@global.slave_rows_search_algorithms= @saved_slave_rows_search_algorithms;
 --echo *** end of tests
 --source include/rpl_end.inc

=== added file 'mysql-test/suite/sys_vars/r/slave_rows_search_algorithms_basic.result'
--- a/mysql-test/suite/sys_vars/r/slave_rows_search_algorithms_basic.result	1970-01-01 00:00:00 +0000
+++ b/mysql-test/suite/sys_vars/r/slave_rows_search_algorithms_basic.result	2012-04-04 17:15:40 +0000
@@ -0,0 +1,60 @@
+set @saved_slave_rows_search_algorithms = @@global.slave_rows_search_algorithms;
+SELECT @@global.slave_rows_search_algorithms;
+@@global.slave_rows_search_algorithms
+TABLE_SCAN,INDEX_SCAN
+SET GLOBAL SLAVE_ROWS_SEARCH_ALGORITHMS=DEFAULT;
+SELECT @@global.slave_rows_search_algorithms;
+@@global.slave_rows_search_algorithms
+TABLE_SCAN,INDEX_SCAN
+SET GLOBAL SLAVE_ROWS_SEARCH_ALGORITHMS='TABLE_SCAN';
+SELECT @@global.slave_rows_search_algorithms;
+@@global.slave_rows_search_algorithms
+TABLE_SCAN
+SET GLOBAL SLAVE_ROWS_SEARCH_ALGORITHMS='HASH_SCAN';
+SELECT @@global.slave_rows_search_algorithms;
+@@global.slave_rows_search_algorithms
+HASH_SCAN
+SET GLOBAL SLAVE_ROWS_SEARCH_ALGORITHMS='INDEX_SCAN';
+SELECT @@global.slave_rows_search_algorithms;
+@@global.slave_rows_search_algorithms
+INDEX_SCAN
+SET GLOBAL SLAVE_ROWS_SEARCH_ALGORITHMS='TABLE_SCAN,HASH_SCAN';
+SELECT @@global.slave_rows_search_algorithms;
+@@global.slave_rows_search_algorithms
+TABLE_SCAN,HASH_SCAN
+SET GLOBAL SLAVE_ROWS_SEARCH_ALGORITHMS='TABLE_SCAN,HASH_SCAN,INDEX_SCAN';
+SELECT @@global.slave_rows_search_algorithms;
+@@global.slave_rows_search_algorithms
+TABLE_SCAN,INDEX_SCAN,HASH_SCAN
+SET GLOBAL SLAVE_ROWS_SEARCH_ALGORITHMS='TABLE_SCAN,INDEX_SCAN';
+SELECT @@global.slave_rows_search_algorithms;
+@@global.slave_rows_search_algorithms
+TABLE_SCAN,INDEX_SCAN
+SET GLOBAL SLAVE_ROWS_SEARCH_ALGORITHMS='HASH_SCAN,INDEX_SCAN';
+SELECT @@global.slave_rows_search_algorithms;
+@@global.slave_rows_search_algorithms
+INDEX_SCAN,HASH_SCAN
+SET GLOBAL SLAVE_ROWS_SEARCH_ALGORITHMS='TABLE_5CAN';
+ERROR 42000: Variable 'slave_rows_search_algorithms' can't be set to the value of 'TABLE_5CAN'
+SELECT @@global.slave_rows_search_algorithms;
+@@global.slave_rows_search_algorithms
+INDEX_SCAN,HASH_SCAN
+SET GLOBAL SLAVE_ROWS_SEARCH_ALGORITHMS='';
+ERROR 42000: Variable 'slave_rows_search_algorithms' can't be set to the value of ''
+SELECT @@global.slave_rows_search_algorithms;
+@@global.slave_rows_search_algorithms
+INDEX_SCAN,HASH_SCAN
+SET GLOBAL SLAVE_ROWS_SEARCH_ALGORITHMS='1';
+ERROR 42000: Variable 'slave_rows_search_algorithms' can't be set to the value of '1'
+SELECT @@global.slave_rows_search_algorithms;
+@@global.slave_rows_search_algorithms
+INDEX_SCAN,HASH_SCAN
+SET GLOBAL SLAVE_ROWS_SEARCH_ALGORITHMS=NULL;
+ERROR 42000: Variable 'slave_rows_search_algorithms' can't be set to the value of 'NULL'
+SELECT @@global.slave_rows_search_algorithms;
+@@global.slave_rows_search_algorithms
+INDEX_SCAN,HASH_SCAN
+set global slave_rows_search_algorithms = @saved_slave_rows_search_algorithms;
+SELECT @@global.slave_rows_search_algorithms;
+@@global.slave_rows_search_algorithms
+TABLE_SCAN,INDEX_SCAN

=== added file 'mysql-test/suite/sys_vars/t/slave_rows_search_algorithms_basic.test'
--- a/mysql-test/suite/sys_vars/t/slave_rows_search_algorithms_basic.test	1970-01-01 00:00:00 +0000
+++ b/mysql-test/suite/sys_vars/t/slave_rows_search_algorithms_basic.test	2012-04-03 13:01:53 +0000
@@ -0,0 +1,51 @@
+--source include/not_embedded.inc
+
+set @saved_slave_rows_search_algorithms = @@global.slave_rows_search_algorithms;
+
+
+SELECT @@global.slave_rows_search_algorithms;
+
+SET GLOBAL SLAVE_ROWS_SEARCH_ALGORITHMS=DEFAULT;
+SELECT @@global.slave_rows_search_algorithms;
+
+SET GLOBAL SLAVE_ROWS_SEARCH_ALGORITHMS='TABLE_SCAN';
+SELECT @@global.slave_rows_search_algorithms;
+
+SET GLOBAL SLAVE_ROWS_SEARCH_ALGORITHMS='HASH_SCAN';
+SELECT @@global.slave_rows_search_algorithms;
+
+SET GLOBAL SLAVE_ROWS_SEARCH_ALGORITHMS='INDEX_SCAN';
+SELECT @@global.slave_rows_search_algorithms;
+
+SET GLOBAL SLAVE_ROWS_SEARCH_ALGORITHMS='TABLE_SCAN,HASH_SCAN';
+SELECT @@global.slave_rows_search_algorithms;
+
+SET GLOBAL SLAVE_ROWS_SEARCH_ALGORITHMS='TABLE_SCAN,HASH_SCAN,INDEX_SCAN';
+SELECT @@global.slave_rows_search_algorithms;
+
+SET GLOBAL SLAVE_ROWS_SEARCH_ALGORITHMS='TABLE_SCAN,INDEX_SCAN';
+SELECT @@global.slave_rows_search_algorithms;
+
+SET GLOBAL SLAVE_ROWS_SEARCH_ALGORITHMS='HASH_SCAN,INDEX_SCAN';
+SELECT @@global.slave_rows_search_algorithms;
+
+
+# checking that setting variable to a non existing value raises error
+--error ER_WRONG_VALUE_FOR_VAR
+SET GLOBAL SLAVE_ROWS_SEARCH_ALGORITHMS='TABLE_5CAN';
+SELECT @@global.slave_rows_search_algorithms;
+
+--error ER_WRONG_VALUE_FOR_VAR
+SET GLOBAL SLAVE_ROWS_SEARCH_ALGORITHMS='';
+SELECT @@global.slave_rows_search_algorithms;
+
+--error ER_WRONG_VALUE_FOR_VAR
+SET GLOBAL SLAVE_ROWS_SEARCH_ALGORITHMS='1';
+SELECT @@global.slave_rows_search_algorithms;
+
+--error ER_WRONG_VALUE_FOR_VAR
+SET GLOBAL SLAVE_ROWS_SEARCH_ALGORITHMS=NULL;
+SELECT @@global.slave_rows_search_algorithms;
+
+set global slave_rows_search_algorithms = @saved_slave_rows_search_algorithms;
+SELECT @@global.slave_rows_search_algorithms;

=== modified file 'sql/handler.h'
--- a/sql/handler.h	2012-04-26 10:33:24 +0000
+++ b/sql/handler.h	2012-05-02 12:04:42 +0000
@@ -194,6 +194,14 @@ enum enum_alter_inplace_result {
  */
 #define HA_CAN_FULLTEXT_EXT              (LL(1) << 39)
 
+/*
+  Storage engine doesn't synchronize result set with expected table contents.
+  Used by replication slave to check if it is possible to retrieve rows from
+  the table when deciding whether to do a full table scan, index scan or hash
+  scan while applying a row event.
+ */
+#define HA_READ_OUT_OF_SYNC              (LL(1) << 40)
+
 /* bits in index_flags(index_number) for what you can do with index */
 #define HA_READ_NEXT            1       /* TODO really use this flag */
 #define HA_READ_PREV            2       /* supports ::index_prev */

=== modified file 'sql/log_event.cc'
--- a/sql/log_event.cc	2012-04-27 11:57:38 +0000
+++ b/sql/log_event.cc	2012-05-02 12:04:42 +0000
@@ -8660,7 +8660,7 @@ Rows_log_event::Rows_log_event(THD *thd_
     m_width(tbl_arg ? tbl_arg->s->fields : 1),
     m_rows_buf(0), m_rows_cur(0), m_rows_end(0), m_flags(0) 
 #ifdef HAVE_REPLICATION
-    , m_curr_row(NULL), m_curr_row_end(NULL), m_key(NULL)
+    , m_curr_row(NULL), m_curr_row_end(NULL), m_key(NULL), last_hashed_key(NULL)
 #endif
 {
   /*
@@ -8708,7 +8708,7 @@ Rows_log_event::Rows_log_event(const cha
 #endif
     m_table_id(0), m_rows_buf(0), m_rows_cur(0), m_rows_end(0)
 #if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
-    , m_curr_row(NULL), m_curr_row_end(NULL), m_key(NULL)
+    , m_curr_row(NULL), m_curr_row_end(NULL), m_key(NULL), last_hashed_key(NULL)
 #endif
 {
   DBUG_ENTER("Rows_log_event::Rows_log_event(const char*,...)");
@@ -8913,2442 +8913,3009 @@ int Rows_log_event::do_add_row_data(ucha
 #endif
 
 #if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
-int Rows_log_event::do_apply_event(Relay_log_info const *rli)
-{
-  DBUG_ENTER("Rows_log_event::do_apply_event(Relay_log_info*)");
-  int error= 0;
 
-  if (opt_bin_log)
-  {
-    enum_gtid_statement_status state= gtid_pre_statement_checks(thd);
-    if (state == GTID_STATEMENT_CANCEL)
-      // error has already been printed; don't print anything more here
-      DBUG_RETURN(-1);
-    else if (state == GTID_STATEMENT_SKIP)
-      DBUG_RETURN(0);
-  }
+/**
+  Checks if any of the columns in the given table is
+  signaled in the bitmap.
 
-  /*
-    If m_table_id == ~0UL, then we have a dummy event that does not
-    contain any data.  In that case, we just remove all tables in the
-    tables_to_lock list, close the thread tables, and return with
-    success.
-   */
-  if (m_table_id == ~0UL)
-  {
-    /*
-       This one is supposed to be set: just an extra check so that
-       nothing strange has happened.
-     */
-    DBUG_ASSERT(get_flags(STMT_END_F));
+  For each column in the given table checks if it is
+  signaled in the bitmap. This is most useful when deciding
+  whether a before image (BI) can be used or not for
+  searching a row. If no column is signaled, then the
+  image cannot be used for searching a record (regardless
+  of using position(), index scan or table scan). Here is
+  an example:
 
-    const_cast<Relay_log_info*>(rli)->slave_close_thread_tables(thd);
-    thd->clear_error();
-    DBUG_RETURN(0);
-  }
+  MASTER> SET @@binlog_row_image='MINIMAL';
+  MASTER> CREATE TABLE t1 (a int, b int, c int, primary key(c));
+  SLAVE>  CREATE TABLE t1 (a int, b int);
+  MASTER> INSERT INTO t1 VALUES (1,2,3);
+  MASTER> UPDATE t1 SET a=2 WHERE b=2;
 
-  /*
-    'thd' has been set by exec_relay_log_event(), just before calling
-    do_apply_event(). We still check here to prevent future coding
-    errors.
-  */
-  DBUG_ASSERT(rli->info_thd == thd);
+  For the update statement only the PK (column c) is
+  logged in the before image (BI). As such, given that
+  the slave has no column c, it will not be able to
+  find the row, because BI has no values for the columns
+  the slave knows about (column a and b).
 
-  /*
-    If there is no locks taken, this is the first binrow event seen
-    after the table map events.  We should then lock all the tables
-    used in the transaction and proceed with execution of the actual
-    event.
-  */
-  if (!thd->lock)
+  @param table   the table reference on the slave.
+  @param cols the bitmap signaling columns available in
+                 the BI.
+
+  @return TRUE if BI contains usable colums for searching,
+          FALSE otherwise.
+*/
+static
+my_bool is_any_column_signaled_for_table(TABLE *table, MY_BITMAP *cols)
+{
+  DBUG_ENTER("is_any_column_signaled_for_table");
+
+  for (Field **ptr= table->field ;
+       *ptr && ((*ptr)->field_index < cols->n_bits);
+       ptr++)
   {
-    /*
-      Lock_tables() reads the contents of thd->lex, so they must be
-      initialized.
+    if (bitmap_is_set(cols, (*ptr)->field_index))
+      DBUG_RETURN(TRUE);
+  }
 
-      We also call the mysql_reset_thd_for_next_command(), since this
-      is the logical start of the next "statement". Note that this
-      call might reset the value of current_stmt_binlog_format, so
-      we need to do any changes to that value after this function.
-    */
-    lex_start(thd);
-    mysql_reset_thd_for_next_command(thd);
-    /*
-      The current statement is just about to begin and 
-      has not yet modified anything. Note, all.modified is reset
-      by mysql_reset_thd_for_next_command.
-    */
-    thd->transaction.stmt.reset_unsafe_rollback_flags();
-    /*
-      This is a row injection, so we flag the "statement" as
-      such. Note that this code is called both when the slave does row
-      injections and when the BINLOG statement is used to do row
-      injections.
-    */
-    thd->lex->set_stmt_row_injection();
+  DBUG_RETURN (FALSE);
+}
 
-    /*
-      There are a few flags that are replicated with each row event.
-      Make sure to set/clear them before executing the main body of
-      the event.
-    */
-    if (get_flags(NO_FOREIGN_KEY_CHECKS_F))
-        thd->variables.option_bits|= OPTION_NO_FOREIGN_KEY_CHECKS;
-    else
-        thd->variables.option_bits&= ~OPTION_NO_FOREIGN_KEY_CHECKS;
+/**
+  Checks if the fields in the given key are signaled in
+  the bitmap.
 
-    if (get_flags(RELAXED_UNIQUE_CHECKS_F))
-        thd->variables.option_bits|= OPTION_RELAXED_UNIQUE_CHECKS;
-    else
-        thd->variables.option_bits&= ~OPTION_RELAXED_UNIQUE_CHECKS;
+  Validates whether the before image is usable for the
+  given key. It can be the case that the before image
+  does not contain values for the key (eg, master was
+  using 'minimal' option for image logging and slave has
+  different index structure on the table). Here is an
+  example:
 
-    /*
-      Note that unlike the other thd options set here, this one
-      comes from a global, and not from the incoming event.
-    */
-    if (opt_slave_allow_batching)
-      thd->variables.option_bits|= OPTION_ALLOW_BATCH;
-    else
-      thd->variables.option_bits&= ~OPTION_ALLOW_BATCH;
+  MASTER> SET @@binlog_row_image='MINIMAL';
+  MASTER> CREATE TABLE t1 (a int, b int, c int, primary key(c));
+  SLAVE> CREATE TABLE t1 (a int, b int, c int, key(a,c));
+  MASTER> INSERT INTO t1 VALUES (1,2,3);
+  MASTER> UPDATE t1 SET a=2 WHERE b=2;
 
-    /* A small test to verify that objects have consistent types */
-    DBUG_ASSERT(sizeof(thd->variables.option_bits) == sizeof(OPTION_RELAXED_UNIQUE_CHECKS));
+  When finding the row on the slave, one cannot use the
+  index (a,c) to search for the row, because there is only
+  data in the before image for column c. This function
+  checks the fields needed for a given key and searches
+  the bitmap to see if all the fields required are
+  signaled.
 
-    if (open_and_lock_tables(thd, rli->tables_to_lock, FALSE, 0))
-    {
-      uint actual_error= thd->get_stmt_da()->sql_errno();
-      if (thd->is_slave_error || thd->is_fatal_error)
-      {
-        /*
-          Error reporting borrowed from Query_log_event with many excessive
-          simplifications. 
-          We should not honour --slave-skip-errors at this point as we are
-          having severe errors which should not be skiped.
-        */
-        rli->report(ERROR_LEVEL, actual_error,
-                    "Error executing row event: '%s'",
-                    (actual_error ? thd->get_stmt_da()->message() :
-                     "unexpected success or fatal error"));
-        thd->is_slave_error= 1;
-      }
-      const_cast<Relay_log_info*>(rli)->slave_close_thread_tables(thd);
-      DBUG_RETURN(actual_error);
-    }
+  @param keyinfo  reference to key.
+  @param cols     the bitmap signaling which columns
+                  have available data.
 
-    /*
-      When the open and locking succeeded, we check all tables to
-      ensure that they still have the correct type.
+  @return TRUE if all fields are signaled in the bitmap
+          for the given key, FALSE otherwise.
+*/
+static
+my_bool are_all_columns_signaled_for_key(KEY *keyinfo, MY_BITMAP *cols)
+{
+  DBUG_ENTER("are_all_columns_signaled_for_key");
 
-      We can use a down cast here since we know that every table added
-      to the tables_to_lock is a RPL_TABLE_LIST.
-    */
+  for (uint i=0 ; i < keyinfo->key_parts ;i++)
+  {
+    uint fieldnr= keyinfo->key_part[i].fieldnr - 1;
+    if (fieldnr >= cols->n_bits ||
+        !bitmap_is_set(cols, fieldnr))
+      DBUG_RETURN(FALSE);
+  }
 
-    {
-      DBUG_PRINT("debug", ("Checking compability of tables to lock - tables_to_lock: %p",
-                           rli->tables_to_lock));
-      RPL_TABLE_LIST *ptr= rli->tables_to_lock;
-      for ( ; ptr ; ptr= static_cast<RPL_TABLE_LIST*>(ptr->next_global))
-      {
-        TABLE *conv_table;
-        if (!ptr->m_tabledef.compatible_with(thd, const_cast<Relay_log_info*>(rli),
-                                             ptr->table, &conv_table))
-        {
-          DBUG_PRINT("debug", ("Table: %s.%s is not compatible with master",
-                               ptr->table->s->db.str,
-                               ptr->table->s->table_name.str));
-          /*
-            We should not honour --slave-skip-errors at this point as we are
-            having severe errors which should not be skiped.
-          */
-          thd->is_slave_error= 1;
-          const_cast<Relay_log_info*>(rli)->slave_close_thread_tables(thd);
-          DBUG_RETURN(ERR_BAD_TABLE_DEF);
-        }
-        DBUG_PRINT("debug", ("Table: %s.%s is compatible with master"
-                             " - conv_table: %p",
-                             ptr->table->s->db.str,
-                             ptr->table->s->table_name.str, conv_table));
-        ptr->m_conv_table= conv_table;
-      }
-    }
+  DBUG_RETURN(TRUE);
+}
 
-    /*
-      ... and then we add all the tables to the table map and but keep
-      them in the tables to lock list.
+/**
+  Searches the table for a given key that can be used
+  according to the existing values, ie, columns set
+  in the bitmap.
 
-      We also invalidate the query cache for all the tables, since
-      they will now be changed.
+  The caller can specify which type of key to find by
+  setting the following flags in the key_type parameter:
 
-      TODO [/Matz]: Maybe the query cache should not be invalidated
-      here? It might be that a table is not changed, even though it
-      was locked for the statement.  We do know that each
-      Rows_log_event contain at least one row, so after processing one
-      Rows_log_event, we can invalidate the query cache for the
-      associated table.
-     */
-    for (TABLE_LIST *ptr= rli->tables_to_lock ; ptr ; ptr= ptr->next_global)
-    {
-      const_cast<Relay_log_info*>(rli)->m_table_map.set_table(ptr->table_id, ptr->table);
-    }
-#ifdef HAVE_QUERY_CACHE
-    query_cache.invalidate_locked_for_write(rli->tables_to_lock);
-#endif
-  }
+    - PRI_KEY_FLAG
+      Returns the primary key.
 
-  TABLE* 
-    table= 
-    m_table= const_cast<Relay_log_info*>(rli)->m_table_map.get_table(m_table_id);
+    - UNIQUE_KEY_FLAG
+      Returns a unique key (flagged with HA_NOSAME)
 
-  DBUG_PRINT("debug", ("m_table: 0x%lx, m_table_id: %lu", (ulong) m_table, m_table_id));
+    - MULTIPLE_KEY_FLAG
+      Returns a key that is not unique (flagged with HA_NOSAME
+      and without HA_NULL_PART_KEY) nor PK.
 
-  if (table)
-  {
-    /*
-      table == NULL means that this table should not be replicated
-      (this was set up by Table_map_log_event::do_apply_event()
-      which tested replicate-* rules).
-    */
+  The above flags can be used together, in which case, the
+  search is conducted in the above listed order. Eg, the
+  following flag:
 
-    /*
-      It's not needed to set_time() but
-      1) it continues the property that "Time" in SHOW PROCESSLIST shows how
-      much slave is behind
-      2) it will be needed when we allow replication from a table with no
-      TIMESTAMP column to a table with one.
-      So we call set_time(), like in SBR. Presently it changes nothing.
-    */
-    thd->set_time(&when);
+    (PRI_KEY_FLAG | UNIQUE_KEY_FLAG | MULTIPLE_KEY_FLAG)
 
-    /*
-      Now we are in a statement and will stay in a statement until we
-      see a STMT_END_F.
-
-      We set this flag here, before actually applying any rows, in
-      case the SQL thread is stopped and we need to detect that we're
-      inside a statement and halting abruptly might cause problems
-      when restarting.
-     */
-    const_cast<Relay_log_info*>(rli)->set_flag(Relay_log_info::IN_STMT);
-
-     if ( m_width == table->s->fields && bitmap_is_set_all(&m_cols))
-      set_flags(COMPLETE_ROWS_F);
+  means that a primary key is returned if it is suitable. If
+  not then the unique keys are searched. If no unique key is
+  suitable, then the keys are searched. Finally, if no key
+  is suitable, MAX_KEY is returned.
 
-    /* 
-      Set tables write and read sets.
-      
-      Read_set contains all slave columns (in case we are going to fetch
-      a complete record from slave)
-      
-      Write_set equals the m_cols bitmap sent from master but it can be 
-      longer if slave has extra columns. 
-     */ 
+  @param table    reference to the table.
+  @param bi_cols  a bitmap that filters out columns that should
+                  not be considered while searching the key.
+                  Columns that should be considered are set.
+  @param key_type the type of key to search for.
 
-    DBUG_PRINT_BITSET("debug", "Setting table's write_set from: %s", &m_cols);
-    
-    bitmap_set_all(table->read_set);
-    if (get_type_code() == DELETE_ROWS_EVENT)
-        bitmap_intersect(table->read_set,&m_cols);
+  @return MAX_KEY if no key, according to the key_type specified
+          is suitable. Returns the key otherwise.
 
-    bitmap_set_all(table->write_set);
-    if (!get_flags(COMPLETE_ROWS_F))
-    {
-      if (get_type_code() == UPDATE_ROWS_EVENT)
-        bitmap_intersect(table->write_set,&m_cols_ai);
-      else /* WRITE ROWS EVENTS store the bitmap in m_cols instead of m_cols_ai */
-        bitmap_intersect(table->write_set,&m_cols);
-    }
+*/
+static
+uint
+search_key_in_table(TABLE *table, MY_BITMAP *bi_cols, uint key_type)
+{
+  DBUG_ENTER("search_key_in_table");
 
-    this->slave_exec_mode= slave_exec_mode_options; // fix the mode
+  KEY *keyinfo;
+  uint res= MAX_KEY;
+  uint key;
 
-    // Do event specific preparations 
-    error= do_before_row_operations(rli);
+  if (key_type & PRI_KEY_FLAG &&
+      (table->s->primary_key < MAX_KEY))
+  {
+    DBUG_PRINT("debug", ("Searching for PK"));
+    keyinfo= table->s->key_info + (uint) table->s->primary_key;
+    if (are_all_columns_signaled_for_key(keyinfo, bi_cols))
+      DBUG_RETURN(table->s->primary_key);
+  }
 
-    /*
-      Bug#56662 Assertion failed: next_insert_id == 0, file handler.cc
-      Don't allow generation of auto_increment value when processing
-      rows event by setting 'MODE_NO_AUTO_VALUE_ON_ZERO'.
-    */
-    ulong saved_sql_mode= thd->variables.sql_mode;
-    thd->variables.sql_mode= MODE_NO_AUTO_VALUE_ON_ZERO;
+  DBUG_PRINT("debug", ("Unique keys count: %u", table->s->uniques));
 
-    // row processing loop
+  if (key_type & UNIQUE_KEY_FLAG && table->s->uniques)
+  {
+    DBUG_PRINT("debug", ("Searching for UK"));
+    for (key=0,keyinfo= table->key_info ;
+         (key < table->s->keys) && (res == MAX_KEY);
+         key++,keyinfo++)
+    {
+      /*
+        - Unique keys cannot be disabled, thence we skip the check.
+        - Skip unique keys with nullable parts
+        - Skip primary keys
+      */
+      if (!((keyinfo->flags & (HA_NOSAME | HA_NULL_PART_KEY)) == HA_NOSAME) ||
+          (key == table->s->primary_key))
+        continue;
+      res= are_all_columns_signaled_for_key(keyinfo, bi_cols) ?
+           key : MAX_KEY;
 
-    /* 
-      set the initial time of this ROWS statement if it was not done
-      before in some other ROWS event. 
-     */
-    const_cast<Relay_log_info*>(rli)->set_row_stmt_start_timestamp();
+      if (res < MAX_KEY)
+        DBUG_RETURN(res);
+    }
+    DBUG_PRINT("debug", ("UK has NULLABLE parts or not all columns signaled."));
+  }
 
-    const uchar *saved_m_curr_row= m_curr_row;
-    while (error == 0)
+  if (key_type & MULTIPLE_KEY_FLAG && table->s->keys)
+  {
+    DBUG_PRINT("debug", ("Searching for K."));
+    for (key=0,keyinfo= table->key_info ;
+         (key < table->s->keys) && (res == MAX_KEY);
+         key++,keyinfo++)
     {
-      /* in_use can have been set to NULL in close_tables_for_reopen */
-      THD* old_thd= table->in_use;
-      if (!table->in_use)
-        table->in_use= thd;
+      /*
+        - Skip innactive keys
+        - Skip unique keys without nullable parts
+        - Skip primary keys
+      */
+      if (!(table->s->keys_in_use.is_set(key)) ||
+          ((keyinfo->flags & (HA_NOSAME | HA_NULL_PART_KEY)) == HA_NOSAME) ||
+          (key == table->s->primary_key))
+        continue;
 
-      error= do_exec_row(rli);
+      res= are_all_columns_signaled_for_key(keyinfo, bi_cols) ?
+           key : MAX_KEY;
 
-      if (error)
-        DBUG_PRINT("info", ("error: %s", HA_ERR(error)));
-      DBUG_ASSERT(error != HA_ERR_RECORD_DELETED);
+      if (res < MAX_KEY)
+        DBUG_RETURN(res);
+    }
+    DBUG_PRINT("debug", ("Not all columns signaled for K."));
+  }
 
-      table->in_use = old_thd;
+  DBUG_RETURN(res);
+}
 
-      if (error)
-      {
-        int actual_error= convert_handler_error(error, thd, table);
-        bool idempotent_error= (idempotent_error_code(error) &&
-                                slave_exec_mode == SLAVE_EXEC_MODE_IDEMPOTENT);
-        bool ignored_error= (idempotent_error == 0 ?
-                             ignored_error_code(actual_error) : 0);
+void
+Rows_log_event::decide_row_lookup_algorithm_and_key()
+{
 
-        if (idempotent_error || ignored_error)
-        {
-          if (log_warnings)
-            slave_rows_error_report(WARNING_LEVEL, error, rli, thd, table,
-                                    get_type_str(),
-                                    const_cast<Relay_log_info*>(rli)->get_rpl_log_name(),
-                                    (ulong) log_pos);
-          clear_all_errors(thd, const_cast<Relay_log_info*>(rli));
-          error= 0;
-          if (idempotent_error == 0)
-            break;
-        }
-      }
+  DBUG_ENTER("decide_row_lookup_algorithm_and_key");
 
-      DBUG_PRINT("info", ("curr_row: 0x%lu; curr_row_end: 0x%lu; rows_end: 0x%lu",
-                          (ulong) m_curr_row, (ulong) m_curr_row_end, (ulong) m_rows_end));
+  /*
+    Decision table:
+    - I  --> Index scan / search
+    - T  --> Table scan
+    - Hi --> Hash over index
+    - Ht --> Hash over the entire table
+
+    |--------------+-----------+------+------+------|
+    | Index\Option | I , T , H | I, T | I, H | T, H |
+    |--------------+-----------+------+------+------|
+    | PK / UK      | I         | I    | I    | Hi   |
+    | K            | Hi        | I    | Hi   | Hi   |
+    | No Index     | Ht        | T    | Ht   | Ht   |
+    |--------------+-----------+------+------+------|
 
-      if (!error)
-      {
-        /*
-          If m_curr_row_end  was not set during event execution (e.g., because
-          of errors) we can't proceed to the next row. If the error is transient
-          (i.e., error == 0 at this point) we must call unpack_current_row() to
-          set m_curr_row_end.
-        */ 
-        if (!m_curr_row_end)
-          error= unpack_current_row(rli, &m_cols);
-  
-        m_curr_row= m_curr_row_end;
-      }
+  */
 
-      // at this moment m_curr_row_end should be set
-      DBUG_ASSERT(error || m_curr_row_end != NULL); 
-      DBUG_ASSERT(error || m_curr_row <= m_curr_row_end);
-      DBUG_ASSERT(error || m_curr_row_end <= m_rows_end);
- 
-      if (m_curr_row == m_rows_end)
-        break;
-    } // row processing loop
+  TABLE *table= this->m_table;
+  uint event_type= this->get_type_code();
+  MY_BITMAP *cols= &this->m_cols;
+  this->m_rows_lookup_algorithm= ROW_LOOKUP_NOT_NEEDED;
+  this->m_key_index= MAX_KEY;
 
-    if (saved_m_curr_row != m_curr_row && !table->file->has_transactions())
-    {
-      /*
-        Usually, the trans_commit_stmt() propagates unsafe_rollback_flags
-        from statement to transaction level. However, we cannot rely on
-        this when row format is in use as several events can be processed
-        before calling this function. This happens because it is called
-        only when the latest event generated by a statement is processed.
+  if (event_type == WRITE_ROWS_EVENT)  // row lookup not needed
+    DBUG_VOID_RETURN;
 
-        There are however upper level functions that execute per event
-        and check transaction's status. So if the unsafe_rollback_flags
-        are not propagated here, this can lead to errors.
+  if (!(slave_rows_search_algorithms_options & SLAVE_ROWS_INDEX_SCAN))
+    goto TABLE_OR_INDEX_HASH_SCAN;
 
-        For example, a transaction that updates non-transactional tables
-        may be stopped in the middle thus leading to inconsistencies
-        after a restart.
-      */
-      thd->transaction.stmt.mark_modified_non_trans_table();
-      thd->transaction.merge_unsafe_rollback_flags();
-    }
+  /* PK or UK => use LOOKUP_INDEX_SCAN */
+  this->m_key_index= search_key_in_table(table, cols, (PRI_KEY_FLAG | UNIQUE_KEY_FLAG));
+  if (this->m_key_index != MAX_KEY)
+  {
+    DBUG_PRINT("info", ("decide_row_lookup_algorithm_and_key: decided - INDEX_SCAN"));
+    this->m_rows_lookup_algorithm= ROW_LOOKUP_INDEX_SCAN;
+    goto end;
+  }
 
-    /*
-      Restore the sql_mode after the rows event is processed.
-    */
-    thd->variables.sql_mode= saved_sql_mode;
+TABLE_OR_INDEX_HASH_SCAN:
 
-    {/**
-         The following failure injecion works in cooperation with tests 
-         setting @@global.debug= 'd,stop_slave_middle_group'.
-         The sql thread receives the killed status and will proceed 
-         to shutdown trying to finish incomplete events group.
-     */
-      DBUG_EXECUTE_IF("stop_slave_middle_group",
-                      if (thd->transaction.all.cannot_safely_rollback())
-                        const_cast<Relay_log_info*>(rli)->abort_slave= 1;);
-    }
+  /*
+     NOTE: Engines like Blackhole cannot use HASH_SCAN, because
+           they do not syncronize reads .
+   */
+  if (!(slave_rows_search_algorithms_options & SLAVE_ROWS_HASH_SCAN) ||
+      (table->file->ha_table_flags() & HA_READ_OUT_OF_SYNC))
+    goto TABLE_OR_INDEX_FULL_SCAN;
 
-    if ((error= do_after_row_operations(rli, error)) &&
-        ignored_error_code(convert_handler_error(error, thd, table)))
-    {
+  /* search for a key to see if we can narrow the lookup domain further. */
+  this->m_key_index= search_key_in_table(table, cols, (PRI_KEY_FLAG | UNIQUE_KEY_FLAG | MULTIPLE_KEY_FLAG));
+  this->m_rows_lookup_algorithm= ROW_LOOKUP_HASH_SCAN;
+  DBUG_PRINT("info", ("decide_row_lookup_algorithm_and_key: decided - HASH_SCAN"));
+  goto end;
 
-      if (log_warnings)
-        slave_rows_error_report(WARNING_LEVEL, error, rli, thd, table,
-                                get_type_str(),
-                                const_cast<Relay_log_info*>(rli)->get_rpl_log_name(),
-                                (ulong) log_pos);
-      clear_all_errors(thd, const_cast<Relay_log_info*>(rli));
-      error= 0;
-    }
-  } // if (table)
+TABLE_OR_INDEX_FULL_SCAN:
 
-  /* reset OPTION_ALLOW_BATCH as not affect later events */
-  thd->variables.option_bits&= ~OPTION_ALLOW_BATCH;
+  this->m_key_index= MAX_KEY;
 
-  if (error)
-  {
-    slave_rows_error_report(ERROR_LEVEL, error, rli, thd, table,
-                             get_type_str(),
-                             const_cast<Relay_log_info*>(rli)->get_rpl_log_name(),
-                             (ulong) log_pos);
-    /*
-      @todo We should probably not call
-      reset_current_stmt_binlog_format_row() from here.
+  /* If we can use an index, try to narrow the scan a bit further. */
+  if (slave_rows_search_algorithms_options & SLAVE_ROWS_INDEX_SCAN)
+    this->m_key_index= search_key_in_table(table, cols, (PRI_KEY_FLAG | UNIQUE_KEY_FLAG | MULTIPLE_KEY_FLAG));
 
-      Note: this applies to log_event_old.cc too.
-      /Sven
-    */
-    thd->reset_current_stmt_binlog_format_row();
-    thd->is_slave_error= 1;
-    DBUG_RETURN(error);
+  if (this->m_key_index != MAX_KEY)
+  {
+    DBUG_PRINT("info", ("decide_row_lookup_algorithm_and_key: decided - INDEX_SCAN"));
+    this->m_rows_lookup_algorithm= ROW_LOOKUP_INDEX_SCAN;
+  }
+  else
+  {
+    DBUG_PRINT("info", ("decide_row_lookup_algorithm_and_key: decided - TABLE_SCAN"));
+    this->m_rows_lookup_algorithm= ROW_LOOKUP_TABLE_SCAN;
   }
 
-  if (get_flags(STMT_END_F) && (error= rows_event_stmt_cleanup(rli, thd)))
-    slave_rows_error_report(ERROR_LEVEL,
-                            thd->is_error() ? 0 : error,
-                            rli, thd, table,
-                            get_type_str(),
-                            const_cast<Relay_log_info*>(rli)->get_rpl_log_name(),
-                            (ulong) log_pos);
-  DBUG_RETURN(error);
+end:
+#ifndef DBUG_OFF
+  const char* s= ((m_rows_lookup_algorithm == Rows_log_event::ROW_LOOKUP_TABLE_SCAN) ? "TABLE_SCAN" :
+                  ((m_rows_lookup_algorithm == Rows_log_event::ROW_LOOKUP_HASH_SCAN) ? "HASH_SCAN" :
+                   "INDEX_SCAN"));
+
+  // only for testing purposes
+  slave_rows_last_search_algorithm_used= m_rows_lookup_algorithm;
+  DBUG_PRINT("debug", ("Row lookup method: %s", s));
+#endif
+
+  DBUG_VOID_RETURN;
 }
 
-Log_event::enum_skip_reason
-Rows_log_event::do_shall_skip(Relay_log_info *rli)
+/*
+  Encapsulates the  operations to be done before applying
+  row events for update and delete.
+
+  @ret value error code
+             0 success
+*/
+int
+Rows_log_event::row_operations_scan_and_key_setup()
 {
+  int error= 0;
+  DBUG_ENTER("Row_log_event::row_operations_scan_and_key_setup");
+
   /*
-    If the slave skip counter is 1 and this event does not end a
-    statement, then we should not start executing on the next event.
-    Otherwise, we defer the decision to the normal skipping logic.
-  */
-  if (rli->slave_skip_counter == 1 && !get_flags(STMT_END_F))
-    return Log_event::EVENT_SKIP_IGNORE;
-  else
-    return Log_event::do_shall_skip(rli);
+     Prepare memory structures for search operations. If
+     search is performed:
+
+     1. using hash search => initialize the hash
+     2. using key => decide on key to use and allocate mem structures
+     3. using table scan => do nothing
+   */
+  decide_row_lookup_algorithm_and_key();
+
+  switch (m_rows_lookup_algorithm)
+  {
+  case ROW_LOOKUP_HASH_SCAN:
+    {
+      if (m_hash.init())
+        error= HA_ERR_OUT_OF_MEM;
+      goto err;
+    }
+  case ROW_LOOKUP_INDEX_SCAN:
+    {
+      DBUG_ASSERT (m_key_index < MAX_KEY);
+      // Allocate buffer for key searches
+      m_key= (uchar*)my_malloc(MAX_KEY_LENGTH, MYF(MY_WME));
+      if (!m_key)
+        error= HA_ERR_OUT_OF_MEM;
+      goto err;
+    }
+  case ROW_LOOKUP_TABLE_SCAN:
+  default: break;
+  }
+err:
+  DBUG_RETURN(error);
 }
 
-/**
-   The function is called at Rows_log_event statement commit time,
-   normally from Rows_log_event::do_update_pos() and possibly from
-   Query_log_event::do_apply_event() of the COMMIT.
-   The function commits the last statement for engines, binlog and
-   releases resources have been allocated for the statement.
-  
-   @retval  0         Ok.
-   @retval  non-zero  Error at the commit.
- */
+/*
+  Encapsulates the  operations to be done after applying
+  row events for update and delete.
 
-static int rows_event_stmt_cleanup(Relay_log_info const *rli, THD * thd)
+  @ret value error code
+             0 success
+*/
+
+int
+Rows_log_event::row_operations_scan_and_key_teardown(int error)
 {
-  int error;
+  DBUG_ENTER("Rows_log_event::row_operations_scan_and_key_teardown");
+
+  DBUG_ASSERT(!m_table->file->inited);
+  switch (m_rows_lookup_algorithm)
   {
-    /*
-      This is the end of a statement or transaction, so close (and
-      unlock) the tables we opened when processing the
-      Table_map_log_event starting the statement.
+  case ROW_LOOKUP_HASH_SCAN:
+    {
+      m_hash.deinit(); // we don't need the hash anymore.
+      goto err;
+    }
 
-      OBSERVER.  This will clear *all* mappings, not only those that
-      are open for the table. There is not good handle for on-close
-      actions for tables.
+  case ROW_LOOKUP_INDEX_SCAN:
+    {
+      if (m_table->s->keys > 0)
+      {
+        my_free(m_key); // Free for multi_malloc
+        m_key= NULL;
+        m_key_index= MAX_KEY;
+      }
+     goto err;
+    }
 
-      NOTE. Even if we have no table ('table' == 0) we still need to be
-      here, so that we increase the group relay log position. If we didn't, we
-      could have a group relay log position which lags behind "forever"
-      (assume the last master's transaction is ignored by the slave because of
-      replicate-ignore rules).
-    */
-    error= thd->binlog_flush_pending_rows_event(TRUE);
+  case ROW_LOOKUP_TABLE_SCAN:
+  default: break;
+  }
 
-    /*
-      If this event is not in a transaction, the call below will, if some
-      transactional storage engines are involved, commit the statement into
-      them and flush the pending event to binlog.
-      If this event is in a transaction, the call will do nothing, but a
-      Xid_log_event will come next which will, if some transactional engines
-      are involved, commit the transaction and flush the pending event to the
-      binlog.
-    */
-    error|= (error ? trans_rollback_stmt(thd) : trans_commit_stmt(thd));
+err:
+  m_rows_lookup_algorithm= ROW_LOOKUP_UNDEFINED;
+  DBUG_RETURN(error);
+}
 
-    /*
-      Now what if this is not a transactional engine? we still need to
-      flush the pending event to the binlog; we did it with
-      thd->binlog_flush_pending_rows_event(). Note that we imitate
-      what is done for real queries: a call to
-      ha_autocommit_or_rollback() (sometimes only if involves a
-      transactional engine), and a call to be sure to have the pending
-      event flushed.
-    */
+/*
+  Compares table->record[0] and table->record[1]
 
-    /*
-      @todo We should probably not call
-      reset_current_stmt_binlog_format_row() from here.
+  Returns TRUE if different.
+*/
+static bool record_compare(TABLE *table, MY_BITMAP *cols)
+{
+  DBUG_ENTER("record_compare");
 
-      Note: this applies to log_event_old.cc too
+  /*
+    Need to set the X bit and the filler bits in both records since
+    there are engines that do not set it correctly.
 
-      Btw, the previous comment about transactional engines does not
-      seem related to anything that happens here.
-      /Sven
-    */
-    thd->reset_current_stmt_binlog_format_row();
+    In addition, since MyISAM checks that one hasn't tampered with the
+    record, it is necessary to restore the old bytes into the record
+    after doing the comparison.
 
-    const_cast<Relay_log_info*>(rli)->cleanup_context(thd, 0);
-  }
-  return error;
-}
+    TODO[record format ndb]: Remove it once NDB returns correct
+    records. Check that the other engines also return correct records.
+   */
 
-/**
-   The method either increments the relay log position or
-   commits the current statement and increments the master group 
-   possition if the event is STMT_END_F flagged and
-   the statement corresponds to the autocommit query (i.e replicated
-   without wrapping in BEGIN/COMMIT)
+  DBUG_DUMP("record[0]", table->record[0], table->s->reclength);
+  DBUG_DUMP("record[1]", table->record[1], table->s->reclength);
 
-   @retval 0         Success
-   @retval non-zero  Error in the statement commit
- */
-int
-Rows_log_event::do_update_pos(Relay_log_info *rli)
-{
-  DBUG_ENTER("Rows_log_event::do_update_pos");
-  int error= 0;
+  bool result= FALSE;
+  uchar saved_x[2]= {0, 0}, saved_filler[2]= {0, 0};
 
-  DBUG_PRINT("info", ("flags: %s",
-                      get_flags(STMT_END_F) ? "STMT_END_F " : ""));
+  if (table->s->null_bytes > 0)
+  {
+    for (int i = 0 ; i < 2 ; ++i)
+    {
+      /*
+        If we have an X bit then we need to take care of it.
+      */
+      if (!(table->s->db_options_in_use & HA_OPTION_PACK_RECORD))
+      {
+        saved_x[i]= table->record[i][0];
+        table->record[i][0]|= 1U;
+      }
 
-  /* Worker does not execute binlog update position logics */
-  DBUG_ASSERT(!is_mts_worker(rli->info_thd));
+      /*
+         If (last_null_bit_pos == 0 && null_bytes > 1), then:
 
-  if (get_flags(STMT_END_F))
+         X bit (if any) + N nullable fields + M Field_bit fields = 8 bits
+
+         Ie, the entire byte is used.
+      */
+      if (table->s->last_null_bit_pos > 0)
+      {
+        saved_filler[i]= table->record[i][table->s->null_bytes - 1];
+        table->record[i][table->s->null_bytes - 1]|=
+          256U - (1U << table->s->last_null_bit_pos);
+      }
+    }
+  }
+
+  if (table->s->blob_fields + table->s->varchar_fields == 0 &&
+      bitmap_is_set_all(cols))
   {
-    /*
-      Indicate that a statement is finished.
-      Step the group log position if we are not in a transaction,
-      otherwise increase the event log position.
-    */
-    error= rli->stmt_done(log_pos);
+    result= cmp_record(table,record[1]);
+    goto record_compare_exit;
   }
-  else
+
+  /* Compare null bits */
+  if (bitmap_is_set_all(cols) &&
+      memcmp(table->null_flags,
+       table->null_flags+table->s->rec_buff_length,
+       table->s->null_bytes))
   {
-    rli->inc_event_relay_log_pos();
+    result= TRUE;       // Diff in NULL value
+    goto record_compare_exit;
   }
 
-  DBUG_RETURN(error);
-}
+  /* Compare updated fields */
+  for (Field **ptr= table->field ;
+       *ptr && ((*ptr)->field_index < cols->n_bits);
+       ptr++)
+  {
+    if (bitmap_is_set(cols, (*ptr)->field_index))
+    {
+      if ((*ptr)->cmp_binary_offset(table->s->rec_buff_length))
+      {
+        result= TRUE;
+        goto record_compare_exit;
+      }
+    }
+  }
 
-#endif /* !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) */
+record_compare_exit:
+  /*
+    Restore the saved bytes.
 
-#ifndef MYSQL_CLIENT
-bool Rows_log_event::write_data_header(IO_CACHE *file)
-{
-  uchar buf[ROWS_HEADER_LEN];	// No need to init the buffer
-  DBUG_ASSERT(m_table_id != ~0UL);
-  DBUG_EXECUTE_IF("old_row_based_repl_4_byte_map_id_master",
-                  {
-                    int4store(buf + 0, m_table_id);
-                    int2store(buf + 4, m_flags);
-                    return (wrapper_my_b_safe_write(file, buf, 6));
-                  });
-  int6store(buf + RW_MAPID_OFFSET, (ulonglong)m_table_id);
-  int2store(buf + RW_FLAGS_OFFSET, m_flags);
-  return (wrapper_my_b_safe_write(file, buf, ROWS_HEADER_LEN));
+    TODO[record format ndb]: Remove this code once NDB returns the
+    correct record format.
+  */
+  if (table->s->null_bytes > 0)
+  {
+    for (int i = 0 ; i < 2 ; ++i)
+    {
+      if (!(table->s->db_options_in_use & HA_OPTION_PACK_RECORD))
+        table->record[i][0]= saved_x[i];
+
+      if (table->s->last_null_bit_pos)
+        table->record[i][table->s->null_bytes - 1]= saved_filler[i];
+    }
+  }
+
+  DBUG_RETURN(result);
 }
 
-bool Rows_log_event::write_data_body(IO_CACHE*file)
+void Rows_log_event::do_post_row_operations(Relay_log_info const *rli, int error)
 {
+
   /*
-     Note that this should be the number of *bits*, not the number of
-     bytes.
+    If m_curr_row_end  was not set during event execution (e.g., because
+    of errors) we can't proceed to the next row. If the error is transient
+    (i.e., error==0 at this point) we must call unpack_current_row() to set
+    m_curr_row_end.
   */
-  uchar sbuf[sizeof(m_width) + 1];
-  my_ptrdiff_t const data_size= m_rows_cur - m_rows_buf;
-  bool res= false;
-  uchar *const sbuf_end= net_store_length(sbuf, (size_t) m_width);
-  DBUG_ASSERT(static_cast<size_t>(sbuf_end - sbuf) <= sizeof(sbuf));
 
-  DBUG_DUMP("m_width", sbuf, (size_t) (sbuf_end - sbuf));
-  res= res || wrapper_my_b_safe_write(file, sbuf, (size_t) (sbuf_end - sbuf));
+  DBUG_PRINT("info", ("curr_row: 0x%lu; curr_row_end: 0x%lu; rows_end: 0x%lu",
+                      (ulong) m_curr_row, (ulong) m_curr_row_end, (ulong) m_rows_end));
 
-  DBUG_DUMP("m_cols", (uchar*) m_cols.bitmap, no_bytes_in_map(&m_cols));
-  res= res || wrapper_my_b_safe_write(file, (uchar*) m_cols.bitmap,
-                              no_bytes_in_map(&m_cols));
-  /*
-    TODO[refactor write]: Remove the "down cast" here (and elsewhere).
-   */
-  if (get_type_code() == UPDATE_ROWS_EVENT)
+  if (!m_curr_row_end && !error)
   {
-    DBUG_DUMP("m_cols_ai", (uchar*) m_cols_ai.bitmap,
-              no_bytes_in_map(&m_cols_ai));
-    res= res || wrapper_my_b_safe_write(file, (uchar*) m_cols_ai.bitmap,
-                                no_bytes_in_map(&m_cols_ai));
+    error= unpack_current_row(rli, &m_cols);
   }
-  DBUG_DUMP("rows", m_rows_buf, data_size);
-  res= res || wrapper_my_b_safe_write(file, m_rows_buf, (size_t) data_size);
 
-  return res;
+  // at this moment m_curr_row_end should be set
+  DBUG_ASSERT(error || m_curr_row_end != NULL);
+  DBUG_ASSERT(error || m_curr_row <= m_curr_row_end);
+  DBUG_ASSERT(error || m_curr_row_end <= m_rows_end);
+
+  m_curr_row= m_curr_row_end;
 
+  if (error == 0 && !m_table->file->has_transactions())
+  {
+    thd->transaction.all.set_unsafe_rollback_flags(TRUE);
+    thd->transaction.stmt.set_unsafe_rollback_flags(TRUE);
+  }
 }
-#endif
 
-#if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT)
-int Rows_log_event::pack_info(Protocol *protocol)
+int Rows_log_event::handle_idempotent_errors(Relay_log_info const *rli, int *err)
 {
-  char buf[256];
-  char const *const flagstr=
-    get_flags(STMT_END_F) ? " flags: STMT_END_F" : "";
-  size_t bytes= my_snprintf(buf, sizeof(buf),
-                               "table_id: %lu%s", m_table_id, flagstr);
-  protocol->store(buf, bytes, &my_charset_bin);
-  return 0;
+  int error= *err;
+  if (error)
+  {
+    int actual_error= convert_handler_error(error, thd, m_table);
+    bool idempotent_error= (idempotent_error_code(error) &&
+                           (slave_exec_mode == SLAVE_EXEC_MODE_IDEMPOTENT));
+    bool ignored_error= (idempotent_error == 0 ?
+                         ignored_error_code(actual_error) : 0);
+
+    if (idempotent_error || ignored_error)
+    {
+      if (log_warnings)
+        slave_rows_error_report(WARNING_LEVEL, error, rli, thd, m_table,
+                                get_type_str(),
+                                const_cast<Relay_log_info*>(rli)->get_rpl_log_name(),
+                                (ulong) log_pos);
+      clear_all_errors(thd, const_cast<Relay_log_info*>(rli));
+      *err= 0;
+      if (idempotent_error == 0)
+        return ignored_error;
+    }
+  }
+
+  return *err;
 }
-#endif
 
-#ifdef MYSQL_CLIENT
-void Rows_log_event::print_helper(FILE *file,
-                                  PRINT_EVENT_INFO *print_event_info,
-                                  char const *const name)
+int Rows_log_event::do_apply_row(Relay_log_info const *rli)
 {
-  IO_CACHE *const head= &print_event_info->head_cache;
-  IO_CACHE *const body= &print_event_info->body_cache;
-  if (!print_event_info->short_form)
+  DBUG_ENTER("Rows_log_event::do_apply_row");
+
+  int error= 0;
+
+  /* in_use can have been set to NULL in close_tables_for_reopen */
+  THD* old_thd= m_table->in_use;
+  if (!m_table->in_use)
+    m_table->in_use= thd;
+
+  error= do_exec_row(rli);
+
+  if(error)
   {
-    bool const last_stmt_event= get_flags(STMT_END_F);
-    print_header(head, print_event_info, !last_stmt_event);
-    my_b_printf(head, "\t%s: table id %lu%s\n",
-                name, m_table_id,
-                last_stmt_event ? " flags: STMT_END_F" : "");
-    print_base64(body, print_event_info, !last_stmt_event);
+    DBUG_PRINT("info", ("error: %s", HA_ERR(error)));
+    DBUG_ASSERT(error != HA_ERR_RECORD_DELETED);
   }
-}
-#endif
 
-/**************************************************************************
-	Table_map_log_event member functions and support functions
-**************************************************************************/
+  m_table->in_use = old_thd;
+
+  DBUG_RETURN(error);
+}
 
 /**
-  @page How replication of field metadata works.
-  
-  When a table map is created, the master first calls 
-  Table_map_log_event::save_field_metadata() which calculates how many 
-  values will be in the field metadata. Only those fields that require the 
-  extra data are added. The method also loops through all of the fields in 
-  the table calling the method Field::save_field_metadata() which returns the
-  values for the field that will be saved in the metadata and replicated to
-  the slave. Once all fields have been processed, the table map is written to
-  the binlog adding the size of the field metadata and the field metadata to
-  the end of the body of the table map.
+   Does the cleanup
+     -  deallocates all the elements in m_distinct_key_list if any
+     -  closes the index if opened by open_record_scan
+     -  closes the table if opened for scanning.
+*/
+int
+Rows_log_event::close_record_scan()
+{
+  DBUG_ENTER("Rows_log_event::close_record_scan");
+  int error= 0;
 
-  When a table map is read on the slave, the field metadata is read from the 
-  table map and passed to the table_def class constructor which saves the 
-  field metadata from the table map into an array based on the type of the 
-  field. Field metadata values not present (those fields that do not use extra 
-  data) in the table map are initialized as zero (0). The array size is the 
-  same as the columns for the table on the slave.
+  // if there is something to actually close
+  if (m_key_index < MAX_KEY)
+  {
+    if (m_table->file->inited)
+      error= m_table->file->ha_index_end();
 
-  Additionally, values saved for field metadata on the master are saved as a 
-  string of bytes (uchar) in the binlog. A field may require 1 or more bytes
-  to store the information. In cases where values require multiple bytes 
-  (e.g. values > 255), the endian-safe methods are used to properly encode 
-  the values on the master and decode them on the slave. When the field
-  metadata values are captured on the slave, they are stored in an array of
-  type uint16. This allows the least number of casts to prevent casting bugs
-  when the field metadata is used in comparisons of field attributes. When
-  the field metadata is used for calculating addresses in pointer math, the
-  type used is uint32. 
-*/
+    if(m_rows_lookup_algorithm == ROW_LOOKUP_HASH_SCAN)
+    {
+      uchar *key_val;
+      /* free the allocated memory for each key values */
+      List_iterator_fast<uchar> it(m_distinct_key_list);
+      while((key_val= it++) && (key_val))
+        my_free(key_val);
+    }
+  }
+  else if (m_table->file->inited)
+    error= m_table->file->ha_rnd_end();
 
-#if !defined(MYSQL_CLIENT)
-/**
-  Save the field metadata based on the real_type of the field.
-  The metadata saved depends on the type of the field. Some fields
-  store a single byte for pack_length() while others store two bytes
-  for field_length (max length).
-  
-  @retval  0  Ok.
+  DBUG_RETURN(error);
+}
 
-  @todo
-  We may want to consider changing the encoding of the information.
-  Currently, the code attempts to minimize the number of bytes written to 
-  the tablemap. There are at least two other alternatives; 1) using 
-  net_store_length() to store the data allowing it to choose the number of
-  bytes that are appropriate thereby making the code much easier to 
-  maintain (only 1 place to change the encoding), or 2) use a fixed number
-  of bytes for each field. The problem with option 1 is that net_store_length()
-  will use one byte if the value < 251, but 3 bytes if it is > 250. Thus,
-  for fields like CHAR which can be no larger than 255 characters, the method
-  will use 3 bytes when the value is > 250. Further, every value that is
-  encoded using 2 parts (e.g., pack_length, field_length) will be numerically
-  > 250 therefore will use 3 bytes for eah value. The problem with option 2
-  is less wasteful for space but does waste 1 byte for every field that does
-  not encode 2 parts. 
+/**
+  Fetches next row. If it is a HASH_SCAN over an index, it populates
+  table->record[0] with the next row corresponding to the index. If
+  the indexes are in non-contigous ranges it fetches record corresponding
+  to the key value in the next range.
+
+  @parms: bool first_read : signifying if this is the first time we are reading a row
+          over an index.
+  @return_value: -  error code when there are no more reeords to be fetched or some other
+                    error occured,
+                 -  0 otherwise.
 */
-int Table_map_log_event::save_field_metadata()
+int
+Rows_log_event::next_record_scan(bool first_read)
 {
-  DBUG_ENTER("Table_map_log_event::save_field_metadata");
-  int index= 0;
-  for (unsigned int i= 0 ; i < m_table->s->fields ; i++)
+  DBUG_ENTER("Rows_log_event::next_record_scan");
+  DBUG_ASSERT(m_table->file->inited);
+  TABLE *table= m_table;
+  int error= 0;
+
+  if (m_key_index >= MAX_KEY)
+    error= table->file->ha_rnd_next(table->record[0]);
+  else
   {
-    DBUG_PRINT("debug", ("field_type: %d", m_coltype[i]));
-    index+= m_table->s->field[i]->save_field_metadata(&m_field_metadata[index]);
+    KEY *keyinfo= m_table->key_info + m_key_index;
+    /*
+      We need to set the null bytes to ensure that the filler bit are
+      all set when returning.  There are storage engines that just set
+      the necessary bits on the bytes and don't set the filler bits
+      correctly.
+    */
+    if (table->s->null_bytes > 0)
+      table->record[0][table->s->null_bytes - 1]|=
+        256U - (1U << table->s->last_null_bit_pos);
+
+    if (!first_read)
+    {
+      /*
+        if we fail to fetch next record corresponding to an index value, we
+        move to the next key value. If we are out of key values as well an error
+        will be returned.
+       */
+      error= table->file->ha_index_next(table->record[0]);
+      if(m_rows_lookup_algorithm == ROW_LOOKUP_HASH_SCAN)
+        /*
+          if we are out of rows for this particular key value
+          or we have jumped to the next key value, we reposition the
+          marker according to the next key value that we have in the
+          list.
+         */
+        if ((error) ||
+            (key_cmp(keyinfo->key_part, m_key, keyinfo->key_length) != 0))
+        {
+          if ((m_key= m_itr++))
+            first_read= true;
+          else
+            error= HA_ERR_KEY_NOT_FOUND;
+        }
+    }
+
+    if (first_read)
+      if ((error= table->file->ha_index_read_map(table->record[0], m_key,
+                                                 HA_WHOLE_KEY,
+                                                 HA_READ_KEY_EXACT)))
+      {
+        DBUG_PRINT("info",("no record matching the key found in the table"));
+        if (error == HA_ERR_RECORD_DELETED)
+          error= HA_ERR_KEY_NOT_FOUND;
+      }
   }
-  DBUG_RETURN(index);
+
+  DBUG_RETURN(error);
 }
-#endif /* !defined(MYSQL_CLIENT) */
 
-/*
-  Constructor used to build an event for writing to the binary log.
-  Mats says tbl->s lives longer than this event so it's ok to copy pointers
-  (tbl->s->db etc) and not pointer content.
- */
-#if !defined(MYSQL_CLIENT)
-Table_map_log_event::Table_map_log_event(THD *thd, TABLE *tbl, ulong tid,
-                                         bool using_trans)
-  : Log_event(thd, 0,
-              using_trans ? Log_event::EVENT_TRANSACTIONAL_CACHE :
-                            Log_event::EVENT_STMT_CACHE,
-              Log_event::EVENT_NORMAL_LOGGING),
-    m_table(tbl),
-    m_dbnam(tbl->s->db.str),
-    m_dblen(m_dbnam ? tbl->s->db.length : 0),
-    m_tblnam(tbl->s->table_name.str),
-    m_tbllen(tbl->s->table_name.length),
-    m_colcnt(tbl->s->fields),
-    m_memory(NULL),
-    m_table_id(tid),
-    m_flags(TM_BIT_LEN_EXACT_F),
-    m_data_size(0),
-    m_field_metadata(0),
-    m_field_metadata_size(0),
-    m_null_bits(0),
-    m_meta_memory(NULL)
+/**
+  Initializes scanning of rows. Opens an index and initializes an iterator
+  over a list of distinct keys (m_distinct_key_list) if it is a HASH_SCAN
+  over an index or the table if its a HASH_SCAN over the table.
+*/
+int
+Rows_log_event::open_record_scan()
 {
-  uchar cbuf[sizeof(m_colcnt) + 1];
-  uchar *cbuf_end;
-  DBUG_ASSERT(m_table_id != ~0UL);
-  /*
-    In TABLE_SHARE, "db" and "table_name" are 0-terminated (see this comment in
-    table.cc / alloc_table_share():
-      Use the fact the key is db/0/table_name/0
-    As we rely on this let's assert it.
-  */
-  DBUG_ASSERT((tbl->s->db.str == 0) ||
-              (tbl->s->db.str[tbl->s->db.length] == 0));
-  DBUG_ASSERT(tbl->s->table_name.str[tbl->s->table_name.length] == 0);
-
-
-  m_data_size=  TABLE_MAP_HEADER_LEN;
-  DBUG_EXECUTE_IF("old_row_based_repl_4_byte_map_id_master", m_data_size= 6;);
-  m_data_size+= m_dblen + 2;	// Include length and terminating \0
-  m_data_size+= m_tbllen + 2;	// Include length and terminating \0
-  cbuf_end= net_store_length(cbuf, (size_t) m_colcnt);
-  DBUG_ASSERT(static_cast<size_t>(cbuf_end - cbuf) <= sizeof(cbuf));
-  m_data_size+= (cbuf_end - cbuf) + m_colcnt;	// COLCNT and column types
+  int error= 0;
+  TABLE *table= m_table;
+  DBUG_ENTER("Rows_log_event::open_record_scan");
 
-  /* If malloc fails, caught in is_valid() */
-  if ((m_memory= (uchar*) my_malloc(m_colcnt, MYF(MY_WME))))
+  if (m_key_index < MAX_KEY )
   {
-    m_coltype= reinterpret_cast<uchar*>(m_memory);
-    for (unsigned int i= 0 ; i < m_table->s->fields ; ++i)
-      m_coltype[i]= m_table->field[i]->binlog_type();
-  }
+    KEY *keyinfo= m_table->key_info + m_key_index;
+    if(m_rows_lookup_algorithm == ROW_LOOKUP_HASH_SCAN)
+    {
+      /* initialize the iterator over the list of distinct keys that we have */
+      m_itr.init(m_distinct_key_list);
 
-  /*
-    Calculate a bitmap for the results of maybe_null() for all columns.
-    The bitmap is used to determine when there is a column from the master
-    that is not on the slave and is null and thus not in the row data during
-    replication.
-  */
-  uint num_null_bytes= (m_table->s->fields + 7) / 8;
-  m_data_size+= num_null_bytes;
-  m_meta_memory= (uchar *)my_multi_malloc(MYF(MY_WME),
-                                 &m_null_bits, num_null_bytes,
-                                 &m_field_metadata, (m_colcnt * 2),
-                                 NULL);
+      /* get the first element from the list of keys and increment the
+         iterator
+       */
+      m_key= m_itr++;
+    }
+    else {
+      /* this is an INDEX_SCAN we need to store the key in m_key */
+      DBUG_ASSERT((m_rows_lookup_algorithm == ROW_LOOKUP_INDEX_SCAN) && m_key);
+      key_copy(m_key, m_table->record[0], keyinfo, 0);
+    }
 
-  memset(m_field_metadata, 0, (m_colcnt * 2));
+    /*
+      Save copy of the record in table->record[1]. It might be needed
+      later if linear search is used to find exact match.
+     */
+    store_record(table,record[1]);
 
-  /*
-    Create an array for the field metadata and store it.
-  */
-  m_field_metadata_size= save_field_metadata();
-  DBUG_ASSERT(m_field_metadata_size <= (m_colcnt * 2));
+    DBUG_PRINT("info",("locating record using a key (index_read)"));
 
-  /*
-    Now set the size of the data to the size of the field metadata array
-    plus one or three bytes (see pack.c:net_store_length) for number of 
-    elements in the field metadata array.
-  */
-  if (m_field_metadata_size < 251)
-    m_data_size+= m_field_metadata_size + 1; 
+    /* The m_key_index'th key is active and usable: search the table using the index */
+    if (!table->file->inited && (error= table->file->ha_index_init(m_key_index, FALSE)))
+    {
+      DBUG_PRINT("info",("ha_index_init returns error %d",error));
+      goto end;
+    }
+
+    /*
+      Don't print debug messages when running valgrind since they can
+      trigger false warnings.
+     */
+#ifndef HAVE_purify
+    DBUG_DUMP("key data", m_key, keyinfo->key_length);
+#endif
+  }
   else
-    m_data_size+= m_field_metadata_size + 3; 
+  {
+    if ((error= table->file->ha_rnd_init(1)))
+    {
+      DBUG_PRINT("info",("error initializing table scan"
+          " (ha_rnd_init returns %d)",error));
+      table->file->print_error(error, MYF(0));
+    }
+  }
 
-  memset(m_null_bits, 0, num_null_bytes);
-  for (unsigned int i= 0 ; i < m_table->s->fields ; ++i)
-    if (m_table->field[i]->maybe_null())
-      m_null_bits[(i / 8)]+= 1 << (i % 8);
+end:
+  DBUG_RETURN(error);
+}
 
+/**
+  Populates the m_distinct_key_list with unique keys to be modified
+  during HASH_SCAN over keys.
+  @return_value -0 success
+                -Err_code
+*/
+int
+Rows_log_event::add_key_to_distinct_keyset()
+{
+  int error= 0;
+  bool distinct= true;
+  DBUG_ENTER("Rows_log_event::add_key_to_distinct_keyset");
+  DBUG_ASSERT(m_key_index < MAX_KEY);
+  KEY *cur_key_info= m_table->key_info + m_key_index;
+
+  if ((last_hashed_key))
+    distinct= key_cmp(cur_key_info->key_part, last_hashed_key,
+                      cur_key_info->key_length);
+
+  if (distinct)
+  {
+    uchar *cur_key= (uchar *)my_malloc(cur_key_info->key_length,
+                                       MYF(MY_WME));
+    if (!cur_key )
+    {
+      error= HA_ERR_OUT_OF_MEM;
+      goto err;
+    }
+    m_distinct_key_list.push_back(cur_key);
+    last_hashed_key= cur_key;
+    key_copy(cur_key, m_table->record[0], cur_key_info, 0);
+  }
+
+err:
+  DBUG_RETURN(error);
 }
-#endif /* !defined(MYSQL_CLIENT) */
 
-/*
-  Constructor used by slave to read the event from the binary log.
- */
-#if defined(HAVE_REPLICATION)
-Table_map_log_event::Table_map_log_event(const char *buf, uint event_len,
-                                         const Format_description_log_event
-                                         *description_event)
 
-  : Log_event(buf, description_event),
-#ifndef MYSQL_CLIENT
-    m_table(NULL),
-#endif
-    m_dbnam(NULL), m_dblen(0), m_tblnam(NULL), m_tbllen(0),
-    m_colcnt(0), m_coltype(0),
-    m_memory(NULL), m_table_id(ULONG_MAX), m_flags(0),
-    m_data_size(0), m_field_metadata(0), m_field_metadata_size(0),
-    m_null_bits(0), m_meta_memory(NULL)
+int Rows_log_event::do_index_scan_and_update(Relay_log_info const *rli)
 {
-  unsigned int bytes_read= 0;
-  DBUG_ENTER("Table_map_log_event::Table_map_log_event(const char*,uint,...)");
+  DBUG_ENTER("Rows_log_event::do_index_scan_and_update");
+  DBUG_ASSERT(m_table && m_table->in_use != NULL);
 
-  uint8 common_header_len= description_event->common_header_len;
-  uint8 post_header_len= description_event->post_header_len[TABLE_MAP_EVENT-1];
-  DBUG_PRINT("info",("event_len: %u  common_header_len: %d  post_header_len: %d",
-                     event_len, common_header_len, post_header_len));
+  KEY *keyinfo= NULL;
+  TABLE *table= m_table;
+  int error= 0;
+  const uchar *saved_m_curr_row= m_curr_row;
 
   /*
-    Don't print debug messages when running valgrind since they can
-    trigger false warnings.
-   */
-#ifndef HAVE_purify
-  DBUG_DUMP("event buffer", (uchar*) buf, event_len);
-#endif
+    rpl_row_tabledefs.test specifies that
+    if the extra field on the slave does not have a default value
+    and this is okay with Delete or Update events.
+    Todo: fix wl3228 hld that requires defaults for all types of events
+  */
 
-  /* Read the post-header */
-  const char *post_start= buf + common_header_len;
+  prepare_record(table, &m_cols, FALSE);
+  if ((error= unpack_current_row(rli, &m_cols)))
+    goto end;
 
-  post_start+= TM_MAPID_OFFSET;
-  if (post_header_len == 6)
+  // Temporary fix to find out why it fails [/Matz]
+  memcpy(m_table->read_set->bitmap, m_cols.bitmap, (m_table->read_set->n_bits + 7) / 8);
+
+  /*
+    Trying to do an index scan without a usable key
+    This is a valid state because we allow the user
+    to set Slave_rows_search_algorithm= 'INDEX_SCAN'.
+
+    Therefore on tables with no indexes we will end
+    up here.
+   */
+  if (m_key_index >= MAX_KEY)
   {
-    /* Master is of an intermediate source tree before 5.1.4. Id is 4 bytes */
-    m_table_id= uint4korr(post_start);
-    post_start+= 4;
-  }
-  else
-  {
-    DBUG_ASSERT(post_header_len == TABLE_MAP_HEADER_LEN);
-    m_table_id= (ulong) uint6korr(post_start);
-    post_start+= TM_FLAGS_OFFSET;
+    error= HA_ERR_END_OF_FILE;
+    goto end;
   }
 
-  DBUG_ASSERT(m_table_id != ~0UL);
+#ifndef DBUG_OFF
+  DBUG_PRINT("info",("looking for the following record"));
+  DBUG_DUMP("record[0]", table->record[0], table->s->reclength);
+#endif
 
-  m_flags= uint2korr(post_start);
+  if (m_key_index != m_table->s->primary_key)
+    /* we dont have a PK, or PK is not usable */
+    goto INDEX_SCAN;
 
-  /* Read the variable part of the event */
-  const char *const vpart= buf + common_header_len + post_header_len;
+  if ((table->file->ha_table_flags() & HA_PRIMARY_KEY_REQUIRED_FOR_POSITION))
+  {
+    /*
+      Use a more efficient method to fetch the record given by
+      table->record[0] if the engine allows it.  We first compute a
+      row reference using the position() member function (it will be
+      stored in table->file->ref) and then use rnd_pos() to position
+      the "cursor" (i.e., record[0] in this case) at the correct row.
 
-  /* Extract the length of the various parts from the buffer */
-  uchar const *const ptr_dblen= (uchar const*)vpart + 0;
-  m_dblen= *(uchar*) ptr_dblen;
+      TODO: Check that the correct record has been fetched by
+      comparing it with the original record. Take into account that the
+      record on the master and slave can be of different
+      length. Something along these lines should work:
 
-  /* Length of database name + counter + terminating null */
-  uchar const *const ptr_tbllen= ptr_dblen + m_dblen + 2;
-  m_tbllen= *(uchar*) ptr_tbllen;
+      ADD>>>  store_record(table,record[1]);
+              int error= table->file->rnd_pos(table->record[0], table->file->ref);
+      ADD>>>  DBUG_ASSERT(memcmp(table->record[1], table->record[0],
+                                 table->s->reclength) == 0);
 
-  /* Length of table name + counter + terminating null */
-  uchar const *const ptr_colcnt= ptr_tbllen + m_tbllen + 2;
-  uchar *ptr_after_colcnt= (uchar*) ptr_colcnt;
-  m_colcnt= net_field_length(&ptr_after_colcnt);
+    */
 
-  DBUG_PRINT("info",("m_dblen: %lu  off: %ld  m_tbllen: %lu  off: %ld  m_colcnt: %lu  off: %ld",
-                     (ulong) m_dblen, (long) (ptr_dblen-(const uchar*)vpart), 
-                     (ulong) m_tbllen, (long) (ptr_tbllen-(const uchar*)vpart),
-                     m_colcnt, (long) (ptr_colcnt-(const uchar*)vpart)));
+    DBUG_PRINT("info",("locating record using primary key (position)"));
+    if (table->file->inited && (error= table->file->ha_index_end()))
+      goto end;
 
-  /* Allocate mem for all fields in one go. If fails, caught in is_valid() */
-  m_memory= (uchar*) my_multi_malloc(MYF(MY_WME),
-                                     &m_dbnam, (uint) m_dblen + 1,
-                                     &m_tblnam, (uint) m_tbllen + 1,
-                                     &m_coltype, (uint) m_colcnt,
-                                     NullS);
+    if ((error= table->file->ha_rnd_init(FALSE)))
+      goto end;
 
-  if (m_memory)
-  {
-    /* Copy the different parts into their memory */
-    strncpy(const_cast<char*>(m_dbnam), (const char*)ptr_dblen  + 1, m_dblen + 1);
-    strncpy(const_cast<char*>(m_tblnam), (const char*)ptr_tbllen + 1, m_tbllen + 1);
-    memcpy(m_coltype, ptr_after_colcnt, m_colcnt);
+    error= table->file->rnd_pos_by_record(table->record[0]);
 
-    ptr_after_colcnt= ptr_after_colcnt + m_colcnt;
-    bytes_read= (uint) (ptr_after_colcnt - (uchar *)buf);
-    DBUG_PRINT("info", ("Bytes read: %d.\n", bytes_read));
-    if (bytes_read < event_len)
+    table->file->ha_rnd_end();
+    if (error)
     {
-      m_field_metadata_size= net_field_length(&ptr_after_colcnt);
-      DBUG_ASSERT(m_field_metadata_size <= (m_colcnt * 2));
-      uint num_null_bytes= (m_colcnt + 7) / 8;
-      m_meta_memory= (uchar *)my_multi_malloc(MYF(MY_WME),
-                                     &m_null_bits, num_null_bytes,
-                                     &m_field_metadata, m_field_metadata_size,
-                                     NULL);
-      memcpy(m_field_metadata, ptr_after_colcnt, m_field_metadata_size);
-      ptr_after_colcnt= (uchar*)ptr_after_colcnt + m_field_metadata_size;
-      memcpy(m_null_bits, ptr_after_colcnt, num_null_bytes);
+      DBUG_PRINT("info",("rnd_pos returns error %d",error));
+      if (error == HA_ERR_RECORD_DELETED)
+        error= HA_ERR_KEY_NOT_FOUND;
     }
-  }
-
-  DBUG_VOID_RETURN;
-}
-#endif
-
-Table_map_log_event::~Table_map_log_event()
-{
-  my_free(m_meta_memory);
-  my_free(m_memory);
-}
 
-/*
-  Return value is an error code, one of:
+    goto end;
+  }
 
-      -1     Failure to open table   [from open_tables()]
-       0     Success
-       1     No room for more tables [from set_table()]
-       2     Out of memory           [from set_table()]
-       3     Wrong table definition
-       4     Daisy-chaining RBR with SBR not possible
- */
+  // We can't use position() - try other methods.
 
-#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
+INDEX_SCAN:
 
-enum enum_tbl_map_status
-{
-  /* no duplicate identifier found */
-  OK_TO_PROCESS= 0,
+  /* Use the m_key_index'th key */
+  keyinfo= table->key_info + m_key_index;
 
-  /* this table map must be filtered out */
-  FILTERED_OUT= 1,
+  if ((error= open_record_scan()))
+    goto end;
 
-  /* identifier mapping table with different properties */
-  SAME_ID_MAPPING_DIFFERENT_TABLE= 2,
-  
-  /* a duplicate identifier was found mapping the same table */
-  SAME_ID_MAPPING_SAME_TABLE= 3
-};
+  error= next_record_scan(true);
+  if (error)
+  {
+    DBUG_PRINT("info",("no record matching the key found in the table"));
+    if (error == HA_ERR_RECORD_DELETED)
+      error= HA_ERR_KEY_NOT_FOUND;
+    goto end;
+  }
 
-/*
-  Checks if this table map event should be processed or not. First
-  it checks the filtering rules, and then looks for duplicate identifiers
-  in the existing list of rli->tables_to_lock.
 
-  It checks that there hasn't been any corruption by verifying that there
-  are no duplicate entries with different properties.
+  /*
+    Don't print debug messages when running valgrind since they can
+    trigger false warnings.
+   */
+#ifndef HAVE_purify
+  DBUG_PRINT("info",("found first matching record"));
+  DBUG_DUMP("record[0]", table->record[0], table->s->reclength);
+#endif
+  /*
+    Below is a minor "optimization".  If the key (i.e., key number
+    0) has the HA_NOSAME flag set, we know that we have found the
+    correct record (since there can be no duplicates); otherwise, we
+    have to compare the record with the one found to see if it is
+    the correct one.
+
+    CAVEAT! This behaviour is essential for the replication of,
+    e.g., the mysql.proc table since the correct record *shall* be
+    found using the primary key *only*.  There shall be no
+    comparison of non-PK columns to decide if the correct record is
+    found.  I can see no scenario where it would be incorrect to
+    chose the row to change only using a PK or an UNNI.
+  */
+  if (keyinfo->flags & HA_NOSAME || m_key_index == table->s->primary_key)
+  {
+    /* Unique does not have non nullable part */
+    if (!(keyinfo->flags & (HA_NULL_PART_KEY)))
+      goto end;  // record found
+    else
+    {
+      /*
+        Unique has nullable part. We need to check if there is any field in the
+        BI image that is null and part of UNNI.
+      */
+      bool null_found= FALSE;
+      for (uint i=0; i < keyinfo->key_parts && !null_found; i++)
+      {
+        uint fieldnr= keyinfo->key_part[i].fieldnr - 1;
+        Field **f= table->field+fieldnr;
+        null_found= (*f)->is_null();
+      }
 
-  In some cases, some binary logs could get corrupted, showing several
-  tables mapped to the same table_id, 0 (see: BUG#56226). Thus we do this
-  early sanity check for such cases and avoid that the server crashes 
-  later.
+      if (!null_found)
+        goto end;           // record found
 
-  In some corner cases, the master logs duplicate table map events, i.e.,
-  same id, same database name, same table name (see: BUG#37137). This is
-  different from the above as it's the same table that is mapped again 
-  to the same identifier. Thus we cannot just check for same ids and 
-  assume that the event is corrupted we need to check every property. 
+      /* else fall through to index scan */
+    }
+  }
 
-  NOTE: in the event that BUG#37137 ever gets fixed, this extra check 
-        will still be valid because we would need to support old binary 
-        logs anyway.
+  /*
+    In case key is not unique, we still have to iterate over records found
+    and find the one which is identical to the row given. A copy of the
+    record we are looking for is stored in record[1].
+   */
+  DBUG_PRINT("info",("non-unique index, scanning it to find matching record"));
 
-  @param rli The relay log info reference.
-  @param table_list A list element containing the table to check against.
-  @return OK_TO_PROCESS 
-            if there was no identifier already in rli->tables_to_lock 
-            
-          FILTERED_OUT
-            if the event is filtered according to the filtering rules
+  while (record_compare(table, &m_cols))
+  {
+    while((error= next_record_scan(false)))
+    {
+      /* We just skip records that has already been deleted */
+      if (error == HA_ERR_RECORD_DELETED)
+        continue;
+      DBUG_PRINT("info",("no record matching the given row found"));
+      goto end;
+    }
+  }
 
-          SAME_ID_MAPPING_DIFFERENT_TABLE 
-            if the same identifier already maps a different table in 
-            rli->tables_to_lock
+end:
 
-          SAME_ID_MAPPING_SAME_TABLE 
-            if the same identifier already maps the same table in 
-            rli->tables_to_lock.
-*/
-static enum_tbl_map_status
-check_table_map(Relay_log_info const *rli, RPL_TABLE_LIST *table_list)
-{
-  DBUG_ENTER("check_table_map");
-  enum_tbl_map_status res= OK_TO_PROCESS;
+  DBUG_ASSERT(error != HA_ERR_RECORD_DELETED);
 
-  if (rli->info_thd->slave_thread /* filtering is for slave only */ &&
-      (!rpl_filter->db_ok(table_list->db) ||
-       (rpl_filter->is_on() && !rpl_filter->tables_ok("", table_list))))
-    res= FILTERED_OUT;
+  if (error && error != HA_ERR_RECORD_DELETED)
+    table->file->print_error(error, MYF(0));
   else
-  {
-    for(RPL_TABLE_LIST *ptr= static_cast<RPL_TABLE_LIST*>(rli->tables_to_lock);
-        ptr; 
-        ptr= static_cast<RPL_TABLE_LIST*>(ptr->next_local))
-    {
-      if (ptr->table_id == table_list->table_id)
-      {
+    error= do_apply_row(rli);
 
-        if (strcmp(ptr->db, table_list->db) || 
-            strcmp(ptr->alias, table_list->table_name) || 
-            ptr->lock_type != TL_WRITE) // the ::do_apply_event always sets TL_WRITE
-          res= SAME_ID_MAPPING_DIFFERENT_TABLE;
-        else
-          res= SAME_ID_MAPPING_SAME_TABLE;
+  if (!error)
+    error= close_record_scan();  
+  else
+    /* 
+      we are already with errors. Keep the error code and 
+      try to close the scan anyway.
+    */
+    (void) close_record_scan(); 
 
-        break;
-      }
-    }
+  if ((get_type_code() == UPDATE_ROWS_EVENT) &&
+      (saved_m_curr_row == m_curr_row))
+  {
+    /* we need to unpack the AI so that positions get updated */
+    m_curr_row= m_curr_row_end;
+    unpack_current_row(rli, &m_cols);
   }
+  table->default_column_bitmaps();
+  DBUG_RETURN(error);
 
-  DBUG_PRINT("debug", ("check of table map ended up with: %u", res));
-
-  DBUG_RETURN(res);
 }
 
-int Table_map_log_event::do_apply_event(Relay_log_info const *rli)
+
+int Rows_log_event::do_hash_scan_and_update(Relay_log_info const *rli)
 {
-  RPL_TABLE_LIST *table_list;
-  char *db_mem, *tname_mem, *ptr;
-  size_t dummy_len;
-  void *memory;
-  DBUG_ENTER("Table_map_log_event::do_apply_event(Relay_log_info*)");
-  DBUG_ASSERT(rli->info_thd == thd);
+  DBUG_ASSERT(m_table && m_table->in_use != NULL);
+  TABLE *table= m_table;
 
-  /* Step the query id to mark what columns that are actually used. */
-  thd->set_query_id(next_query_id());
+  int error= 0;
+  const uchar *saved_last_m_curr_row= NULL;
+  const uchar *bi_start= NULL;
+  const uchar *bi_ends= NULL;
+  const uchar *ai_start= NULL;
+  const uchar *ai_ends= NULL;
+  HASH_ROW_ENTRY* entry;
+  int idempotent_errors= 0;
 
-  if (!(memory= my_multi_malloc(MYF(MY_WME),
-                                &table_list, (uint) sizeof(RPL_TABLE_LIST),
-                                &db_mem, (uint) NAME_LEN + 1,
-                                &tname_mem, (uint) NAME_LEN + 1,
-                                NullS)))
-    DBUG_RETURN(HA_ERR_OUT_OF_MEM);
+  DBUG_ENTER("Rows_log_event::do_hash_scan_and_update");
 
-  strmov(db_mem, m_dbnam);
-  strmov(tname_mem, m_tblnam);
+  bi_start= m_curr_row;
+  if ((error= unpack_current_row(rli, &m_cols)))
+    goto err;
+  bi_ends= m_curr_row_end;
 
-  if (lower_case_table_names == 1)
+  store_record(m_table, record[1]);
+
+  if (get_type_code() == UPDATE_ROWS_EVENT)
   {
-    my_casedn_str(system_charset_info, db_mem);
-    my_casedn_str(system_charset_info, tname_mem);
+    /*
+      This is the situation after hashing the BI:
+
+      ===|=== before image ====|=== after image ===|===
+         ^                     ^
+         m_curr_row            m_curr_row_end
+
+      We need to skip the AI as well, before moving on to the
+      next row.
+    */
+    ai_start= m_curr_row= m_curr_row_end;
+    error= unpack_current_row(rli, &m_cols_ai);
+    ai_ends= m_curr_row_end;
   }
 
-  /* rewrite rules changed the database */
-  if (((ptr= (char*) rpl_filter->get_rewrite_db(db_mem, &dummy_len)) != db_mem))
-    strmov(db_mem, ptr);
+  /* move BI to index 0 */
+  memcpy(m_table->record[0], m_table->record[1], m_table->s->reclength);
 
-  table_list->init_one_table(db_mem, strlen(db_mem),
-                             tname_mem, strlen(tname_mem),
-                             tname_mem, TL_WRITE);
+  /* create an entry to add to the hash table */
+  entry= m_hash.make_entry(bi_start, bi_ends, ai_start, ai_ends);
 
-  table_list->table_id= DBUG_EVALUATE_IF("inject_tblmap_same_id_maps_diff_table", 0, m_table_id);
-  table_list->updating= 1;
-  DBUG_PRINT("debug", ("table: %s is mapped to %u", table_list->table_name, table_list->table_id));
-  enum_tbl_map_status tblmap_status= check_table_map(rli, table_list);
-  if (tblmap_status == OK_TO_PROCESS)
-  {
-    DBUG_ASSERT(thd->lex->query_tables != table_list);
+  /* add it to the hash table */
+  m_hash.put(m_table, &m_cols, entry);
+  if (m_key_index < MAX_KEY)
+    add_key_to_distinct_keyset();
 
-    /*
-      Use placement new to construct the table_def instance in the
-      memory allocated for it inside table_list.
+  /*
+    Last row hashed. We are handling the last (pair of) row(s).  So
+    now we do the table scan and match against the entries in the hash
+    table.
+   */
+  if (m_curr_row_end == m_rows_end)
+  {
+    saved_last_m_curr_row=m_curr_row;
 
-      The memory allocated by the table_def structure (i.e., not the
-      memory allocated *for* the table_def structure) is released
-      inside Relay_log_info::clear_tables_to_lock() by calling the
-      table_def destructor explicitly.
-    */
-    new (&table_list->m_tabledef)
-      table_def(m_coltype, m_colcnt,
-                m_field_metadata, m_field_metadata_size,
-                m_null_bits, m_flags);
-    table_list->m_tabledef_valid= TRUE;
-    table_list->m_conv_table= NULL;
-    table_list->open_type= OT_BASE_ONLY;
+    DBUG_PRINT("info",("Hash was populated with %d records!", m_hash.size()));
+
+    /* open table or index depending on whether we have set m_key_index or not. */
+    if ((error= open_record_scan()))
+      goto err;
 
     /*
-      We record in the slave's information that the table should be
-      locked by linking the table into the list of tables to lock.
+       Scan the table only once and compare against entries in hash.
+       When a match is found, apply the changes.
+     */
+    int i= 0;
+    do
+    {
+      /* get the next record from the table */
+      error= next_record_scan(i == 0);
+      i++;
+
+      if(error)
+        DBUG_PRINT("info", ("error: %s", HA_ERR(error)));
+      switch (error) {
+        case 0:
+        {
+          entry= m_hash.get(table, &m_cols);
+          store_record(table, record[1]);
+
+          /**
+             If there are collisions we need to be sure that this is
+             indeed the record we want.  Loop through all records for
+             the given key and explicitly compare them against the
+             record we got from the storage engine.
+           */
+          while(entry)
+          {
+            m_curr_row= entry->positions->bi_start;
+            m_curr_row_end= entry->positions->bi_ends;
+
+            if ((error= unpack_current_row(rli, &m_cols)))
+              goto close_table;
+
+            if (record_compare(m_table, &m_cols))
+              m_hash.next(&entry);
+            else
+              break;   // we found a match
+          }
+
+          /**
+             We found the entry we needed, just apply the changes.
+           */
+          if (entry)
+          {
+            // just to be safe, copy the record from the SE to table->record[0]
+            memcpy(table->record[0], table->record[1], table->s->reclength);
+
+            /**
+               At this point, both table->record[0] and
+               table->record[1] have the SE row that matched the one
+               in the hash table.
+
+               Thence if this is a DELETE we wouldn't need to mess
+               around with positions anymore, but since this can be an
+               update, we need to provide positions so that AI is
+               unpacked correctly to table->record[0] in UPDATE
+               implementation of do_exec_row().
+            */
+            m_curr_row= entry->positions->bi_start;
+            m_curr_row_end= entry->positions->bi_ends;
+
+            /* we don't need this entry anymore, just delete it */
+            if ((error= m_hash.del(entry)))
+              goto err;
+
+            if ((error= do_apply_row(rli)))
+            {
+              if (handle_idempotent_errors(rli, &error))
+                goto close_table;
+
+              do_post_row_operations(rli, error);
+            }
+          }
+        }
+        break;
+
+        case HA_ERR_RECORD_DELETED:
+          // get next
+          continue;
+
+        case HA_ERR_KEY_NOT_FOUND:
+          /* If the slave exec mode is idempotent don't break */
+          if (handle_idempotent_errors(rli, &error))
+            goto close_table;
+          idempotent_errors++;
+          continue;
+
+        case HA_ERR_END_OF_FILE:
+        default:
+          // exception (hash is not empty and we have reached EOF or
+          // other error happened)
+          goto close_table;
+      }
+    }
+   /**
+     if the slave_exec_mode is set to Idempotent, we cannot expect the hash to
+     be empty. In such cases we count the number of idempotent errors and check
+     if it is equal to or greater than the number of rows left in the hash.
     */
-    table_list->next_global= table_list->next_local= rli->tables_to_lock;
-    const_cast<Relay_log_info*>(rli)->tables_to_lock= table_list;
-    const_cast<Relay_log_info*>(rli)->tables_to_lock_count++;
-    /* 'memory' is freed in clear_tables_to_lock */
-  }
-  else  // FILTERED_OUT, SAME_ID_MAPPING_*
-  {
-    /*
-      If mapped already but with different properties, we raise an
-      error.
-      If mapped already but with same properties we skip the event.
-      If filtered out we skip the event.
+    while (((idempotent_errors < m_hash.size()) && !m_hash.is_empty()) &&
+           (!error || (error == HA_ERR_RECORD_DELETED)));
 
-      In all three cases, we need to free the memory previously 
-      allocated.
-     */
-    if (tblmap_status == SAME_ID_MAPPING_DIFFERENT_TABLE)
+close_table:
+    if (error)
     {
-      /*
-        Something bad has happened. We need to stop the slave as strange things
-        could happen if we proceed: slave crash, wrong table being updated, ...
-        As a consequence we push an error in this case.
-       */
+      m_table->file->print_error(error, MYF(0));
+      DBUG_PRINT("info", ("Failed to get next record"
+                          " (ha_rnd_next returns %d)",error));
+    }
 
-      char buf[256];
+    if (!error)
+      error= close_record_scan();  
+    else
+      /* 
+        we are already with errors. Keep the error code and 
+        try to close the scan anyway.
+      */
+      (void) close_record_scan(); 
 
-      my_snprintf(buf, sizeof(buf), 
-                  "Found table map event mapping table id %u which "
-                  "was already mapped but with different settings.",
-                  table_list->table_id);
+    if (error == HA_ERR_RECORD_DELETED)
+      error= 0;
 
-      if (thd->slave_thread)
-        rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, 
-                    ER(ER_SLAVE_FATAL_ERROR), buf);
-      else
-        /* 
-          For the cases in which a 'BINLOG' statement is set to 
-          execute in a user session 
-         */
-        my_printf_error(ER_SLAVE_FATAL_ERROR, ER(ER_SLAVE_FATAL_ERROR), 
-                        MYF(0), buf);
-    } 
-    
-    my_free(memory);
+    DBUG_ASSERT((m_hash.is_empty() && !error) ||
+                (!m_hash.is_empty() &&
+                 ((error) || (idempotent_errors >= m_hash.size()))));
   }
 
-  DBUG_RETURN(tblmap_status == SAME_ID_MAPPING_DIFFERENT_TABLE);
-}
+err:
 
-Log_event::enum_skip_reason
-Table_map_log_event::do_shall_skip(Relay_log_info *rli)
-{
-  /*
-    If the slave skip counter is 1, then we should not start executing
-    on the next event.
-  */
-  return continue_group(rli);
-}
+  if ((m_hash.is_empty() && !error) || (idempotent_errors >= m_hash.size()))
+  {
+    /**
+       Reset the last positions, because the positions are lost while
+       handling entries in the hash.
+     */
+    m_curr_row= saved_last_m_curr_row;
+    m_curr_row_end= m_rows_end;
+  }
 
-int Table_map_log_event::do_update_pos(Relay_log_info *rli)
-{
-  rli->inc_event_relay_log_pos();
-  return 0;
+  DBUG_RETURN(error);
 }
 
-#endif /* !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) */
-
-#ifndef MYSQL_CLIENT
-bool Table_map_log_event::write_data_header(IO_CACHE *file)
+int Rows_log_event::do_table_scan_and_update(Relay_log_info const *rli)
 {
-  DBUG_ASSERT(m_table_id != ~0UL);
-  uchar buf[TABLE_MAP_HEADER_LEN];
-  DBUG_EXECUTE_IF("old_row_based_repl_4_byte_map_id_master",
-                  {
-                    int4store(buf + 0, m_table_id);
-                    int2store(buf + 4, m_flags);
-                    return (wrapper_my_b_safe_write(file, buf, 6));
-                  });
-  int6store(buf + TM_MAPID_OFFSET, (ulonglong)m_table_id);
-  int2store(buf + TM_FLAGS_OFFSET, m_flags);
-  return (wrapper_my_b_safe_write(file, buf, TABLE_MAP_HEADER_LEN));
-}
+  int error= 0;
+  const uchar* saved_m_curr_row= m_curr_row;
+  TABLE* table= m_table;
 
-bool Table_map_log_event::write_data_body(IO_CACHE *file)
-{
-  DBUG_ASSERT(m_dbnam != NULL);
-  DBUG_ASSERT(m_tblnam != NULL);
-  /* We use only one byte per length for storage in event: */
-  DBUG_ASSERT(m_dblen < 128);
-  DBUG_ASSERT(m_tbllen < 128);
+  DBUG_ENTER("Rows_log_event::do_table_scan_and_update");
+  DBUG_ASSERT(m_curr_row != m_rows_end);
+  DBUG_PRINT("info",("locating record using table scan (ha_rnd_next)"));
 
-  uchar const dbuf[]= { (uchar) m_dblen };
-  uchar const tbuf[]= { (uchar) m_tbllen };
+  saved_m_curr_row= m_curr_row;
 
-  uchar cbuf[sizeof(m_colcnt) + 1];
-  uchar *const cbuf_end= net_store_length(cbuf, (size_t) m_colcnt);
-  DBUG_ASSERT(static_cast<size_t>(cbuf_end - cbuf) <= sizeof(cbuf));
+  /** unpack the before image */
+  prepare_record(table, &m_cols, FALSE);
+  if (!(error= unpack_current_row(rli, &m_cols)))
+  {
+    // Temporary fix to find out why it fails [/Matz]
+    memcpy(m_table->read_set->bitmap, m_cols.bitmap, (m_table->read_set->n_bits + 7) / 8);
 
-  /*
-    Store the size of the field metadata.
-  */
-  uchar mbuf[sizeof(m_field_metadata_size)];
-  uchar *const mbuf_end= net_store_length(mbuf, m_field_metadata_size);
+    /** save a copy so that we can compare against it later */
+    store_record(m_table, record[1]);
 
-  return (wrapper_my_b_safe_write(file, dbuf,      sizeof(dbuf)) ||
-          wrapper_my_b_safe_write(file, (const uchar*)m_dbnam,   m_dblen+1) ||
-          wrapper_my_b_safe_write(file, tbuf,      sizeof(tbuf)) ||
-          wrapper_my_b_safe_write(file, (const uchar*)m_tblnam,  m_tbllen+1) ||
-          wrapper_my_b_safe_write(file, cbuf, (size_t) (cbuf_end - cbuf)) ||
-          wrapper_my_b_safe_write(file, m_coltype, m_colcnt) ||
-          wrapper_my_b_safe_write(file, mbuf, (size_t) (mbuf_end - mbuf)) ||
-          wrapper_my_b_safe_write(file, m_field_metadata, m_field_metadata_size),
-          wrapper_my_b_safe_write(file, m_null_bits, (m_colcnt + 7) / 8));
- }
-#endif
+    int restart_count= 0; // Number of times scanning has restarted from top
 
-#if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT)
+    if ((error= m_table->file->ha_rnd_init(1)))
+    {
+      DBUG_PRINT("info",("error initializing table scan"
+                         " (ha_rnd_init returns %d)",error));
+      goto end;
+    }
 
-/*
-  Print some useful information for the SHOW BINARY LOG information
-  field.
- */
+    /* Continue until we find the right record or have made a full loop */
+    do
+    {
+      error= m_table->file->ha_rnd_next(m_table->record[0]);
+      if (error)
+        DBUG_PRINT("info", ("error: %s", HA_ERR(error)));
+      switch (error) {
+      case HA_ERR_END_OF_FILE:
+        // restart scan from top
+        if (++restart_count < 2)
+          error= m_table->file->ha_rnd_init(1);
+        break;
 
-#if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT)
-int Table_map_log_event::pack_info(Protocol *protocol)
-{
-  char buf[256];
-  size_t bytes= my_snprintf(buf, sizeof(buf),
-                            "table_id: %lu (%s.%s)",
-                            m_table_id, m_dbnam, m_tblnam);
-  protocol->store(buf, bytes, &my_charset_bin);
-  return 0;
-}
-#endif
+      case HA_ERR_RECORD_DELETED:
+        // fetch next
+      case 0:
+        // we're good, check if record matches
+        break;
 
+      default:
+        // exception
+        goto end;
+      }
+    }
+    while ((error == HA_ERR_END_OF_FILE && restart_count < 2) ||
+           (error == HA_ERR_RECORD_DELETED) ||
+           (!error && record_compare(m_table, &m_cols)));
+  }
 
-#endif
+end:
 
+  DBUG_ASSERT(error != HA_ERR_RECORD_DELETED);
 
-#ifdef MYSQL_CLIENT
-void Table_map_log_event::print(FILE *, PRINT_EVENT_INFO *print_event_info)
-{
-  if (!print_event_info->short_form)
+  /* either we report error or apply the changes */
+  if (error && error != HA_ERR_RECORD_DELETED)
   {
-    print_header(&print_event_info->head_cache, print_event_info, TRUE);
-    my_b_printf(&print_event_info->head_cache,
-                "\tTable_map: `%s`.`%s` mapped to number %lu\n",
-                m_dbnam, m_tblnam, m_table_id);
-    print_base64(&print_event_info->body_cache, print_event_info, TRUE);
+    DBUG_PRINT("info", ("Failed to get next record"
+                        " (ha_rnd_next returns %d)",error));
+    m_table->file->print_error(error, MYF(0));
   }
-}
-#endif
+  else
+    error= do_apply_row(rli);
 
-/**************************************************************************
-	Write_rows_log_event member functions
-**************************************************************************/
 
-/*
-  Constructor used to build an event for writing to the binary log.
- */
-#if !defined(MYSQL_CLIENT)
-Write_rows_log_event::Write_rows_log_event(THD *thd_arg, TABLE *tbl_arg,
-                                           ulong tid_arg,
-                                           bool is_transactional)
-  : Rows_log_event(thd_arg, tbl_arg, tid_arg, tbl_arg->write_set, is_transactional)
-{
-}
-#endif
+  if (!error)
+    error= close_record_scan();  
+  else
+    /* 
+      we are already with errors. Keep the error code and 
+      try to close the scan anyway.
+    */
+    (void) close_record_scan(); 
 
-/*
-  Constructor used by slave to read the event from the binary log.
- */
-#ifdef HAVE_REPLICATION
-Write_rows_log_event::Write_rows_log_event(const char *buf, uint event_len,
-                                           const Format_description_log_event
-                                           *description_event)
-: Rows_log_event(buf, event_len, WRITE_ROWS_EVENT, description_event)
-{
+  if ((get_type_code() == UPDATE_ROWS_EVENT) &&
+      (saved_m_curr_row == m_curr_row)) // we need to unpack the AI
+  {
+    m_curr_row= m_curr_row_end;
+    unpack_current_row(rli, &m_cols);
+  }
+
+  table->default_column_bitmaps();
+  DBUG_RETURN(error);
 }
-#endif
 
-#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
-int 
-Write_rows_log_event::do_before_row_operations(const Slave_reporting_capability *const)
+int Rows_log_event::do_apply_event(Relay_log_info const *rli)
 {
+  DBUG_ENTER("Rows_log_event::do_apply_event(Relay_log_info*)");
   int error= 0;
 
+  if (opt_bin_log)
+  {
+    enum_gtid_statement_status state= gtid_pre_statement_checks(thd);
+    if (state == GTID_STATEMENT_CANCEL)
+      // error has already been printed; don't print anything more here
+      DBUG_RETURN(-1);
+    else if (state == GTID_STATEMENT_SKIP)
+      DBUG_RETURN(0);
+  }
+
   /*
-    Increment the global status insert count variable
+    If m_table_id == ~0UL, then we have a dummy event that does not
+    contain any data.  In that case, we just remove all tables in the
+    tables_to_lock list, close the thread tables, and return with
+    success.
+   */
+  if (m_table_id == ~0UL)
+  {
+    /*
+       This one is supposed to be set: just an extra check so that
+       nothing strange has happened.
+     */
+    DBUG_ASSERT(get_flags(STMT_END_F));
+
+    const_cast<Relay_log_info*>(rli)->slave_close_thread_tables(thd);
+    thd->clear_error();
+    DBUG_RETURN(0);
+  }
+
+  /*
+    'thd' has been set by exec_relay_log_event(), just before calling
+    do_apply_event(). We still check here to prevent future coding
+    errors.
   */
-  if (get_flags(STMT_END_F))
-    status_var_increment(thd->status_var.com_stat[SQLCOM_INSERT]);
+  DBUG_ASSERT(rli->info_thd == thd);
 
-  /**
-     todo: to introduce a property for the event (handler?) which forces
-     applying the event in the replace (idempotent) fashion.
+  /*
+    If there is no locks taken, this is the first binrow event seen
+    after the table map events.  We should then lock all the tables
+    used in the transaction and proceed with execution of the actual
+    event.
   */
-  if ((slave_exec_mode == SLAVE_EXEC_MODE_IDEMPOTENT) ||
-      (m_table->s->db_type()->db_type == DB_TYPE_NDBCLUSTER))
+  if (!thd->lock)
   {
     /*
-      We are using REPLACE semantics and not INSERT IGNORE semantics
-      when writing rows, that is: new rows replace old rows.  We need to
-      inform the storage engine that it should use this behaviour.
+      Lock_tables() reads the contents of thd->lex, so they must be
+      initialized.
+
+      We also call the mysql_reset_thd_for_next_command(), since this
+      is the logical start of the next "statement". Note that this
+      call might reset the value of current_stmt_binlog_format, so
+      we need to do any changes to that value after this function.
     */
-    
-    /* Tell the storage engine that we are using REPLACE semantics. */
-    thd->lex->duplicates= DUP_REPLACE;
-    
+    lex_start(thd);
+    mysql_reset_thd_for_next_command(thd);
     /*
-      Pretend we're executing a REPLACE command: this is needed for
-      InnoDB and NDB Cluster since they are not (properly) checking the
-      lex->duplicates flag.
-    */
-    thd->lex->sql_command= SQLCOM_REPLACE;
-    /* 
-       Do not raise the error flag in case of hitting to an unique attribute
-    */
-    m_table->file->extra(HA_EXTRA_IGNORE_DUP_KEY);
-    /* 
-       NDB specific: update from ndb master wrapped as Write_rows
-       so that the event should be applied to replace slave's row
+      The current statement is just about to begin and 
+      has not yet modified anything. Note, all.modified is reset
+      by mysql_reset_thd_for_next_command.
     */
-    m_table->file->extra(HA_EXTRA_WRITE_CAN_REPLACE);
-    /* 
-       NDB specific: if update from ndb master wrapped as Write_rows
-       does not find the row it's assumed idempotent binlog applying
-       is taking place; don't raise the error.
+    thd->transaction.stmt.reset_unsafe_rollback_flags();
+    /*
+      This is a row injection, so we flag the "statement" as
+      such. Note that this code is called both when the slave does row
+      injections and when the BINLOG statement is used to do row
+      injections.
     */
-    m_table->file->extra(HA_EXTRA_IGNORE_NO_KEY);
+    thd->lex->set_stmt_row_injection();
+
     /*
-      TODO: the cluster team (Tomas?) says that it's better if the engine knows
-      how many rows are going to be inserted, then it can allocate needed memory
-      from the start.
+      There are a few flags that are replicated with each row event.
+      Make sure to set/clear them before executing the main body of
+      the event.
     */
-  }
+    if (get_flags(NO_FOREIGN_KEY_CHECKS_F))
+        thd->variables.option_bits|= OPTION_NO_FOREIGN_KEY_CHECKS;
+    else
+        thd->variables.option_bits&= ~OPTION_NO_FOREIGN_KEY_CHECKS;
 
- 
-  /* Honor next number column if present */
-  m_table->next_number_field= m_table->found_next_number_field;
-  /*
-   * Fixed Bug#45999, In RBR, Store engine of Slave auto-generates new
-   * sequence numbers for auto_increment fields if the values of them are 0.
-   * If generateing a sequence number is decided by the values of
-   * table->auto_increment_field_not_null and SQL_MODE(if includes
-   * MODE_NO_AUTO_VALUE_ON_ZERO) in update_auto_increment function.
-   * SQL_MODE of slave sql thread is always consistency with master's.
-   * In RBR, auto_increment fields never are NULL.
-   */
-  m_table->auto_increment_field_not_null= TRUE;
-  return error;
-}
+    if (get_flags(RELAXED_UNIQUE_CHECKS_F))
+        thd->variables.option_bits|= OPTION_RELAXED_UNIQUE_CHECKS;
+    else
+        thd->variables.option_bits&= ~OPTION_RELAXED_UNIQUE_CHECKS;
 
-int 
-Write_rows_log_event::do_after_row_operations(const Slave_reporting_capability *const,
-                                              int error)
-{
-  int local_error= 0;
-  m_table->next_number_field=0;
-  m_table->auto_increment_field_not_null= FALSE;
-  if ((slave_exec_mode == SLAVE_EXEC_MODE_IDEMPOTENT) ||
-      m_table->s->db_type()->db_type == DB_TYPE_NDBCLUSTER)
-  {
-    m_table->file->extra(HA_EXTRA_NO_IGNORE_DUP_KEY);
-    m_table->file->extra(HA_EXTRA_WRITE_CANNOT_REPLACE);
     /*
-      resetting the extra with 
-      table->file->extra(HA_EXTRA_NO_IGNORE_NO_KEY); 
-      fires bug#27077
-      explanation: file->reset() performs this duty
-      ultimately. Still todo: fix
+      Note that unlike the other thd options set here, this one
+      comes from a global, and not from the incoming event.
     */
-  }
-  if ((local_error= m_table->file->ha_end_bulk_insert()))
-  {
-    m_table->file->print_error(local_error, MYF(0));
-  }
-  return error? error : local_error;
-}
+    if (opt_slave_allow_batching)
+      thd->variables.option_bits|= OPTION_ALLOW_BATCH;
+    else
+      thd->variables.option_bits&= ~OPTION_ALLOW_BATCH;
 
-#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
+    /* A small test to verify that objects have consistent types */
+    DBUG_ASSERT(sizeof(thd->variables.option_bits) == sizeof(OPTION_RELAXED_UNIQUE_CHECKS));
 
-/*
-  Check if there are more UNIQUE keys after the given key.
-*/
-static int
-last_uniq_key(TABLE *table, uint keyno)
-{
-  while (++keyno < table->s->keys)
-    if (table->key_info[keyno].flags & HA_NOSAME)
-      return 0;
-  return 1;
-}
+    if (open_and_lock_tables(thd, rli->tables_to_lock, FALSE, 0))
+    {
+      uint actual_error= thd->get_stmt_da()->sql_errno();
+      if (thd->is_slave_error || thd->is_fatal_error)
+      {
+        /*
+          Error reporting borrowed from Query_log_event with many excessive
+          simplifications. 
+          We should not honour --slave-skip-errors at this point as we are
+          having severe errors which should not be skiped.
+        */
+        rli->report(ERROR_LEVEL, actual_error,
+                    "Error executing row event: '%s'",
+                    (actual_error ? thd->get_stmt_da()->message() :
+                     "unexpected success or fatal error"));
+        thd->is_slave_error= 1;
+      }
+      const_cast<Relay_log_info*>(rli)->slave_close_thread_tables(thd);
+      DBUG_RETURN(actual_error);
+    }
 
-/**
-   Check if an error is a duplicate key error.
+    /*
+      When the open and locking succeeded, we check all tables to
+      ensure that they still have the correct type.
 
-   This function is used to check if an error code is one of the
-   duplicate key error, i.e., and error code for which it is sensible
-   to do a <code>get_dup_key()</code> to retrieve the duplicate key.
+      We can use a down cast here since we know that every table added
+      to the tables_to_lock is a RPL_TABLE_LIST.
+    */
 
-   @param errcode The error code to check.
+    {
+      DBUG_PRINT("debug", ("Checking compability of tables to lock - tables_to_lock: %p",
+                           rli->tables_to_lock));
+      RPL_TABLE_LIST *ptr= rli->tables_to_lock;
+      for ( ; ptr ; ptr= static_cast<RPL_TABLE_LIST*>(ptr->next_global))
+      {
+        TABLE *conv_table;
+        if (!ptr->m_tabledef.compatible_with(thd, const_cast<Relay_log_info*>(rli),
+                                             ptr->table, &conv_table))
+        {
+          DBUG_PRINT("debug", ("Table: %s.%s is not compatible with master",
+                               ptr->table->s->db.str,
+                               ptr->table->s->table_name.str));
+          /*
+            We should not honour --slave-skip-errors at this point as we are
+            having severe errors which should not be skiped.
+          */
+          thd->is_slave_error= 1;
+          const_cast<Relay_log_info*>(rli)->slave_close_thread_tables(thd);
+          DBUG_RETURN(ERR_BAD_TABLE_DEF);
+        }
+        DBUG_PRINT("debug", ("Table: %s.%s is compatible with master"
+                             " - conv_table: %p",
+                             ptr->table->s->db.str,
+                             ptr->table->s->table_name.str, conv_table));
+        ptr->m_conv_table= conv_table;
+      }
+    }
 
-   @return <code>true</code> if the error code is such that
-   <code>get_dup_key()</code> will return true, <code>false</code>
-   otherwise.
- */
-bool
-is_duplicate_key_error(int errcode)
-{
-  switch (errcode)
-  {
-  case HA_ERR_FOUND_DUPP_KEY:
-  case HA_ERR_FOUND_DUPP_UNIQUE:
-    return true;
+    /*
+      ... and then we add all the tables to the table map and but keep
+      them in the tables to lock list.
+
+      We also invalidate the query cache for all the tables, since
+      they will now be changed.
+
+      TODO [/Matz]: Maybe the query cache should not be invalidated
+      here? It might be that a table is not changed, even though it
+      was locked for the statement.  We do know that each
+      Rows_log_event contain at least one row, so after processing one
+      Rows_log_event, we can invalidate the query cache for the
+      associated table.
+     */
+    for (TABLE_LIST *ptr= rli->tables_to_lock ; ptr ; ptr= ptr->next_global)
+    {
+      const_cast<Relay_log_info*>(rli)->m_table_map.set_table(ptr->table_id, ptr->table);
+    }
+#ifdef HAVE_QUERY_CACHE
+    query_cache.invalidate_locked_for_write(rli->tables_to_lock);
+#endif
   }
-  return false;
-}
 
-/**
-  Write the current row into event's table.
+  TABLE* 
+    table= 
+    m_table= const_cast<Relay_log_info*>(rli)->m_table_map.get_table(m_table_id);
 
-  The row is located in the row buffer, pointed by @c m_curr_row member.
-  Number of columns of the row is stored in @c m_width member (it can be 
-  different from the number of columns in the table to which we insert). 
-  Bitmap @c m_cols indicates which columns are present in the row. It is assumed 
-  that event's table is already open and pointed by @c m_table.
+  DBUG_PRINT("debug", ("m_table: 0x%lx, m_table_id: %lu", (ulong) m_table, m_table_id));
 
-  If the same record already exists in the table it can be either overwritten 
-  or an error is reported depending on the value of @c overwrite flag 
-  (error reporting not yet implemented). Note that the matching record can be
-  different from the row we insert if we use primary keys to identify records in
-  the table.
+  if (table)
+  {
+    /*
+      table == NULL means that this table should not be replicated
+      (this was set up by Table_map_log_event::do_apply_event()
+      which tested replicate-* rules).
+    */
 
-  The row to be inserted can contain values only for selected columns. The 
-  missing columns are filled with default values using @c prepare_record() 
-  function. If a matching record is found in the table and @c overwritte is
-  true, the missing columns are taken from it.
+    /*
+      It's not needed to set_time() but
+      1) it continues the property that "Time" in SHOW PROCESSLIST shows how
+      much slave is behind
+      2) it will be needed when we allow replication from a table with no
+      TIMESTAMP column to a table with one.
+      So we call set_time(), like in SBR. Presently it changes nothing.
+    */
+    thd->set_time(&when);
 
-  @param  rli   Relay log info (needed for row unpacking).
-  @param  overwrite  
-                Shall we overwrite if the row already exists or signal 
-                error (currently ignored).
+    /*
+      Now we are in a statement and will stay in a statement until we
+      see a STMT_END_F.
 
-  @returns Error code on failure, 0 on success.
+      We set this flag here, before actually applying any rows, in
+      case the SQL thread is stopped and we need to detect that we're
+      inside a statement and halting abruptly might cause problems
+      when restarting.
+     */
+    const_cast<Relay_log_info*>(rli)->set_flag(Relay_log_info::IN_STMT);
 
-  This method, if successful, sets @c m_curr_row_end pointer to point at the
-  next row in the rows buffer. This is done when unpacking the row to be 
-  inserted.
+     if ( m_width == table->s->fields && bitmap_is_set_all(&m_cols))
+      set_flags(COMPLETE_ROWS_F);
 
-  @note If a matching record is found, it is either updated using 
-  @c ha_update_row() or first deleted and then new record written.
-*/ 
+    /*
+      Set tables write and read sets.
 
-int
-Rows_log_event::write_row(const Relay_log_info *const rli,
-                          const bool overwrite)
-{
-  DBUG_ENTER("write_row");
-  DBUG_ASSERT(m_table != NULL && thd != NULL);
+      Read_set contains all slave columns (in case we are going to fetch
+      a complete record from slave)
 
-  TABLE *table= m_table;  // pointer to event's table
-  int error;
-  int UNINIT_VAR(keynum);
-  auto_afree_ptr<char> key(NULL);
+      Write_set equals the m_cols bitmap sent from master but it can be
+      longer if slave has extra columns.
+     */
 
-  prepare_record(table, &m_cols,
-                 table->file->ht->db_type != DB_TYPE_NDBCLUSTER);
+    DBUG_PRINT_BITSET("debug", "Setting table's read_set from: %s", &m_cols);
 
-  /* unpack row into table->record[0] */
-  if ((error= unpack_current_row(rli, &m_cols)))
-    DBUG_RETURN(error);
+    bitmap_set_all(table->read_set);
+    if (get_type_code() == DELETE_ROWS_EVENT ||
+        get_type_code() == UPDATE_ROWS_EVENT)
+        bitmap_intersect(table->read_set,&m_cols);
 
-  // Temporary fix to find out why it fails [/Matz]
-  memcpy(m_table->write_set->bitmap, m_cols.bitmap, (m_table->write_set->n_bits + 7) / 8);
+    bitmap_set_all(table->write_set);
 
-  if (m_curr_row == m_rows_buf)
-  {
-    /* this is the first row to be inserted, we estimate the rows with
-       the size of the first row and use that value to initialize
-       storage engine for bulk insertion */
-    DBUG_ASSERT(!(m_curr_row > m_curr_row_end));
-    ulong estimated_rows= 0;
-    if (m_curr_row < m_curr_row_end)
-      estimated_rows= (m_rows_end - m_curr_row) / (m_curr_row_end - m_curr_row);
-    else if (m_curr_row == m_curr_row_end)
-      estimated_rows= 1;
+    /* WRITE ROWS EVENTS store the bitmap in m_cols instead of m_cols_ai */
+    MY_BITMAP *after_image= ((get_type_code() == UPDATE_ROWS_EVENT) ?
+                             &m_cols_ai : &m_cols);
+    bitmap_intersect(table->write_set, after_image);
 
-    m_table->file->ha_start_bulk_insert(estimated_rows);
-  }
-  
-  
-#ifndef DBUG_OFF
-  DBUG_DUMP("record[0]", table->record[0], table->s->reclength);
-  DBUG_PRINT_BITSET("debug", "write_set = %s", table->write_set);
-  DBUG_PRINT_BITSET("debug", "read_set = %s", table->read_set);
-#endif
+    this->slave_exec_mode= slave_exec_mode_options; // fix the mode
 
-  /* 
-    Try to write record. If a corresponding record already exists in the table,
-    we try to change it using ha_update_row() if possible. Otherwise we delete
-    it and repeat the whole process again. 
+    // Do event specific preparations
+    error= do_before_row_operations(rli);
 
-    TODO: Add safety measures against infinite looping. 
-   */
+    /*
+      Bug#56662 Assertion failed: next_insert_id == 0, file handler.cc
+      Don't allow generation of auto_increment value when processing
+      rows event by setting 'MODE_NO_AUTO_VALUE_ON_ZERO'.
+    */
+    ulong saved_sql_mode= thd->variables.sql_mode;
+    thd->variables.sql_mode= MODE_NO_AUTO_VALUE_ON_ZERO;
 
-  m_table->mark_columns_per_binlog_row_image();
+    // row processing loop
 
-  while ((error= table->file->ha_write_row(table->record[0])))
-  {
-    if (error == HA_ERR_LOCK_DEADLOCK ||
-        error == HA_ERR_LOCK_WAIT_TIMEOUT ||
-        (keynum= table->file->get_dup_key(error)) < 0 ||
-        !overwrite)
-    {
-      DBUG_PRINT("info",("get_dup_key returns %d)", keynum));
-      /*
-        Deadlock, waiting for lock or just an error from the handler
-        such as HA_ERR_FOUND_DUPP_KEY when overwrite is false.
-        Retrieval of the duplicate key number may fail
-        - either because the error was not "duplicate key" error
-        - or because the information which key is not available
-      */
-      table->file->print_error(error, MYF(0));
-      goto error;
-    }
     /*
-       We need to retrieve the old row into record[1] to be able to
-       either update or delete the offending record.  We either:
+      set the initial time of this ROWS statement if it was not done
+      before in some other ROWS event.
+     */
+    const_cast<Relay_log_info*>(rli)->set_row_stmt_start_timestamp();
 
-       - use ha_rnd_pos() with a row-id (available as dupp_row) to the
-         offending row, if that is possible (MyISAM and Blackhole), or else
+    const uchar *saved_m_curr_row= m_curr_row;
 
-       - use ha_index_read_idx_map() with the key that is duplicated, to
-         retrieve the offending row.
+    int (Rows_log_event::*do_apply_row_ptr)(Relay_log_info const *)= NULL;
+
+    /**
+       Skip update rows events that don't have data for this slave's
+       table.
      */
-    if (table->file->ha_table_flags() & HA_DUPLICATE_POS)
+    if ((get_type_code() == UPDATE_ROWS_EVENT) &&
+        !is_any_column_signaled_for_table(table, &m_cols_ai))
+      goto AFTER_MAIN_EXEC_ROW_LOOP;
+
+    /**
+       If there are no columns marked in the read_set for this table,
+       that means that we cannot lookup any row using the available BI
+       in the binarr log. Thence, we immediatly raise an error:
+       HA_ERR_END_OF_FILE.
+     */
+
+    if ((m_rows_lookup_algorithm != ROW_LOOKUP_NOT_NEEDED) &&
+        !is_any_column_signaled_for_table(table, &m_cols))
     {
-      DBUG_PRINT("info",("Locating offending record using ha_rnd_pos()"));
+      error= HA_ERR_END_OF_FILE;
+      goto AFTER_MAIN_EXEC_ROW_LOOP;
+    }
+    switch (m_rows_lookup_algorithm)
+    {
+      case ROW_LOOKUP_HASH_SCAN:
+        do_apply_row_ptr= &Rows_log_event::do_hash_scan_and_update;
+        break;
 
-      if (table->file->inited && (error= table->file->ha_index_end()))
-      {
-        table->file->print_error(error, MYF(0));
-        goto error;
-      }
-      if ((error= table->file->ha_rnd_init(FALSE)))
-      {
-        table->file->print_error(error, MYF(0));
-        goto error;
-      }
+      case ROW_LOOKUP_INDEX_SCAN:
+        do_apply_row_ptr= &Rows_log_event::do_index_scan_and_update;
+        break;
 
-      error= table->file->ha_rnd_pos(table->record[1], table->file->dup_ref);
+      case ROW_LOOKUP_TABLE_SCAN:
+        do_apply_row_ptr= &Rows_log_event::do_table_scan_and_update;
+        break;
 
-      table->file->ha_rnd_end();
-      if (error)
-      {
-        DBUG_PRINT("info",("ha_rnd_pos() returns error %d",error));
-        if (error == HA_ERR_RECORD_DELETED)
-          error= HA_ERR_KEY_NOT_FOUND;
-        table->file->print_error(error, MYF(0));
-        goto error;
-      }
+      case ROW_LOOKUP_NOT_NEEDED:
+        DBUG_ASSERT(get_type_code() == WRITE_ROWS_EVENT);
+
+        /* No need to scan for rows, just apply it */
+        do_apply_row_ptr= &Rows_log_event::do_apply_row;
+        break;
+
+      default:
+        DBUG_ASSERT(0);
+        error= 1;
+        goto AFTER_MAIN_EXEC_ROW_LOOP;
+        break;
     }
-    else
-    {
-      DBUG_PRINT("info",("Locating offending record using index_read_idx()"));
 
-      if (table->file->extra(HA_EXTRA_FLUSH_CACHE))
-      {
-        DBUG_PRINT("info",("Error when setting HA_EXTRA_FLUSH_CACHE"));
-        error= my_errno;
-        goto error;
-      }
+    do {
 
-      if (key.get() == NULL)
-      {
-        key.assign(static_cast<char*>(my_alloca(table->s->max_unique_length)));
-        if (key.get() == NULL)
-        {
-          DBUG_PRINT("info",("Can't allocate key buffer"));
-          error= ENOMEM;
-          goto error;
-        }
-      }
+      error= (this->*do_apply_row_ptr)(rli);
 
-      key_copy((uchar*)key.get(), table->record[0], table->key_info + keynum,
-               0);
-      error= table->file->ha_index_read_idx_map(table->record[1], keynum,
-                                                (const uchar*)key.get(),
-                                                HA_WHOLE_KEY,
-                                                HA_READ_KEY_EXACT);
-      if (error)
-      {
-        DBUG_PRINT("info",("ha_index_read_idx_map() returns %s", HA_ERR(error)));
-        if (error == HA_ERR_RECORD_DELETED)
-          error= HA_ERR_KEY_NOT_FOUND;
-        table->file->print_error(error, MYF(0));
-        goto error;
-      }
-    }
+      if (handle_idempotent_errors(rli, &error))
+        break;
 
-    /*
-       Now, record[1] should contain the offending row.  That
-       will enable us to update it or, alternatively, delete it (so
-       that we can insert the new row afterwards).
-     */
+      /* this advances m_curr_row */
+      do_post_row_operations(rli, error);
 
-    /*
-      If row is incomplete we will use the record found to fill 
-      missing columns.  
-    */
-    if (!get_flags(COMPLETE_ROWS_F))
+    } while (!error && (m_curr_row != m_rows_end));
+
+AFTER_MAIN_EXEC_ROW_LOOP:
+
+    if (saved_m_curr_row != m_curr_row && !table->file->has_transactions())
     {
-      restore_record(table,record[1]);
-      error= unpack_current_row(rli, &m_cols);
-    }
+      /*
+        Usually, the trans_commit_stmt() propagates unsafe_rollback_flags
+        from statement to transaction level. However, we cannot rely on
+        this when row format is in use as several events can be processed
+        before calling this function. This happens because it is called
+        only when the latest event generated by a statement is processed.
 
-#ifndef DBUG_OFF
-    DBUG_PRINT("debug",("preparing for update: before and after image"));
-    DBUG_DUMP("record[1] (before)", table->record[1], table->s->reclength);
-    DBUG_DUMP("record[0] (after)", table->record[0], table->s->reclength);
-#endif
+        There are however upper level functions that execute per event
+        and check transaction's status. So if the unsafe_rollback_flags
+        are not propagated here, this can lead to errors.
 
-    /*
-       REPLACE is defined as either INSERT or DELETE + INSERT.  If
-       possible, we can replace it with an UPDATE, but that will not
-       work on InnoDB if FOREIGN KEY checks are necessary.
+        For example, a transaction that updates non-transactional tables
+        may be stopped in the middle thus leading to inconsistencies
+        after a restart.
+      */
+      thd->transaction.stmt.mark_modified_non_trans_table();
+      thd->transaction.merge_unsafe_rollback_flags();
+    }
 
-       I (Matz) am not sure of the reason for the last_uniq_key()
-       check as, but I'm guessing that it's something along the
-       following lines.
+    /*
+      Restore the sql_mode after the rows event is processed.
+    */
+    thd->variables.sql_mode= saved_sql_mode;
 
-       Suppose that we got the duplicate key to be a key that is not
-       the last unique key for the table and we perform an update:
-       then there might be another key for which the unique check will
-       fail, so we're better off just deleting the row and inserting
-       the correct row.
+    {/*
+         The following failure injecion works in cooperation with tests
+         setting @@global.debug= 'd,stop_slave_middle_group'.
+         The sql thread receives the killed status and will proceed
+         to shutdown trying to finish incomplete events group.
      */
-    if (last_uniq_key(table, keynum) &&
-        !table->file->referenced_by_foreign_key())
-    {
-      DBUG_PRINT("info",("Updating row using ha_update_row()"));
-      error=table->file->ha_update_row(table->record[1],
-                                       table->record[0]);
-      switch (error) {
-                
-      case HA_ERR_RECORD_IS_THE_SAME:
-        DBUG_PRINT("info",("ignoring HA_ERR_RECORD_IS_THE_SAME error from"
-                           " ha_update_row()"));
-        error= 0;
-      
-      case 0:
-        break;
-        
-      default:    
-        DBUG_PRINT("info",("ha_update_row() returns error %d",error));
-        table->file->print_error(error, MYF(0));
-      }
-      
-      goto error;
+      DBUG_EXECUTE_IF("stop_slave_middle_group",
+                      if (thd->transaction.all.cannot_safely_rollback())
+                        const_cast<Relay_log_info*>(rli)->abort_slave= 1;);
     }
-    else
+
+    if ((error= do_after_row_operations(rli, error)) &&
+        ignored_error_code(convert_handler_error(error, thd, table)))
     {
-      DBUG_PRINT("info",("Deleting offending row and trying to write new one again"));
-      if ((error= table->file->ha_delete_row(table->record[1])))
-      {
-        DBUG_PRINT("info",("ha_delete_row() returns error %d",error));
-        table->file->print_error(error, MYF(0));
-        goto error;
-      }
-      /* Will retry ha_write_row() with the offending row removed. */
+
+      if (log_warnings)
+        slave_rows_error_report(WARNING_LEVEL, error, rli, thd, table,
+                                get_type_str(),
+                                const_cast<Relay_log_info*>(rli)->get_rpl_log_name(),
+                                (ulong) log_pos);
+      clear_all_errors(thd, const_cast<Relay_log_info*>(rli));
+      error= 0;
     }
+  } // if (table)
+
+  /* reset OPTION_ALLOW_BATCH as not affect later events */
+  thd->variables.option_bits&= ~OPTION_ALLOW_BATCH;
+
+  if (error)
+  {
+    slave_rows_error_report(ERROR_LEVEL, error, rli, thd, table,
+                             get_type_str(),
+                             const_cast<Relay_log_info*>(rli)->get_rpl_log_name(),
+                             (ulong) log_pos);
+    /*
+      @todo We should probably not call
+      reset_current_stmt_binlog_format_row() from here.
+
+      Note: this applies to log_event_old.cc too.
+      /Sven
+    */
+    thd->reset_current_stmt_binlog_format_row();
+    thd->is_slave_error= 1;
+    DBUG_RETURN(error);
+  }
+
+  if (get_flags(STMT_END_F) && (error= rows_event_stmt_cleanup(rli, thd)))
+    slave_rows_error_report(ERROR_LEVEL,
+                            thd->is_error() ? 0 : error,
+                            rli, thd, table,
+                            get_type_str(),
+                            const_cast<Relay_log_info*>(rli)->get_rpl_log_name(),
+                            (ulong) log_pos);
+  DBUG_RETURN(error);
+}
+
+Log_event::enum_skip_reason
+Rows_log_event::do_shall_skip(Relay_log_info *rli)
+{
+  /*
+    If the slave skip counter is 1 and this event does not end a
+    statement, then we should not start executing on the next event.
+    Otherwise, we defer the decision to the normal skipping logic.
+  */
+  if (rli->slave_skip_counter == 1 && !get_flags(STMT_END_F))
+    return Log_event::EVENT_SKIP_IGNORE;
+  else
+    return Log_event::do_shall_skip(rli);
+}
+
+/**
+   The function is called at Rows_log_event statement commit time,
+   normally from Rows_log_event::do_update_pos() and possibly from
+   Query_log_event::do_apply_event() of the COMMIT.
+   The function commits the last statement for engines, binlog and
+   releases resources have been allocated for the statement.
+
+   @retval  0         Ok.
+   @retval  non-zero  Error at the commit.
+ */
+
+static int rows_event_stmt_cleanup(Relay_log_info const *rli, THD * thd)
+{
+  int error;
+  {
+    /*
+      This is the end of a statement or transaction, so close (and
+      unlock) the tables we opened when processing the
+      Table_map_log_event starting the statement.
+
+      OBSERVER.  This will clear *all* mappings, not only those that
+      are open for the table. There is not good handle for on-close
+      actions for tables.
+
+      NOTE. Even if we have no table ('table' == 0) we still need to be
+      here, so that we increase the group relay log position. If we didn't, we
+      could have a group relay log position which lags behind "forever"
+      (assume the last master's transaction is ignored by the slave because of
+      replicate-ignore rules).
+    */
+    error= thd->binlog_flush_pending_rows_event(TRUE);
+
+    /*
+      If this event is not in a transaction, the call below will, if some
+      transactional storage engines are involved, commit the statement into
+      them and flush the pending event to binlog.
+      If this event is in a transaction, the call will do nothing, but a
+      Xid_log_event will come next which will, if some transactional engines
+      are involved, commit the transaction and flush the pending event to the
+      binlog.
+    */
+    error|= (error ? trans_rollback_stmt(thd) : trans_commit_stmt(thd));
+
+    /*
+      Now what if this is not a transactional engine? we still need to
+      flush the pending event to the binlog; we did it with
+      thd->binlog_flush_pending_rows_event(). Note that we imitate
+      what is done for real queries: a call to
+      ha_autocommit_or_rollback() (sometimes only if involves a
+      transactional engine), and a call to be sure to have the pending
+      event flushed.
+    */
+
+    /*
+      @todo We should probably not call
+      reset_current_stmt_binlog_format_row() from here.
+
+      Note: this applies to log_event_old.cc too
+
+      Btw, the previous comment about transactional engines does not
+      seem related to anything that happens here.
+      /Sven
+    */
+    thd->reset_current_stmt_binlog_format_row();
+
+    const_cast<Relay_log_info*>(rli)->cleanup_context(thd, 0);
+  }
+  return error;
+}
+
+/**
+   The method either increments the relay log position or
+   commits the current statement and increments the master group
+   possition if the event is STMT_END_F flagged and
+   the statement corresponds to the autocommit query (i.e replicated
+   without wrapping in BEGIN/COMMIT)
+
+   @retval 0         Success
+   @retval non-zero  Error in the statement commit
+ */
+int
+Rows_log_event::do_update_pos(Relay_log_info *rli)
+{
+  DBUG_ENTER("Rows_log_event::do_update_pos");
+  int error= 0;
+
+  DBUG_PRINT("info", ("flags: %s",
+                      get_flags(STMT_END_F) ? "STMT_END_F " : ""));
+
+  /* Worker does not execute binlog update position logics */
+  DBUG_ASSERT(!is_mts_worker(rli->info_thd));
+
+  if (get_flags(STMT_END_F))
+  {
+    /*
+      Indicate that a statement is finished.
+      Step the group log position if we are not in a transaction,
+      otherwise increase the event log position.
+    */
+    error= rli->stmt_done(log_pos);
+  }
+  else
+  {
+    rli->inc_event_relay_log_pos();
   }
 
-error:
-  m_table->default_column_bitmaps();
   DBUG_RETURN(error);
 }
 
-#endif
+#endif /* !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) */
+
+#ifndef MYSQL_CLIENT
+bool Rows_log_event::write_data_header(IO_CACHE *file)
+{
+  uchar buf[ROWS_HEADER_LEN];	// No need to init the buffer
+  DBUG_ASSERT(m_table_id != ~0UL);
+  DBUG_EXECUTE_IF("old_row_based_repl_4_byte_map_id_master",
+                  {
+                    int4store(buf + 0, m_table_id);
+                    int2store(buf + 4, m_flags);
+                    return (wrapper_my_b_safe_write(file, buf, 6));
+                  });
+  int6store(buf + RW_MAPID_OFFSET, (ulonglong)m_table_id);
+  int2store(buf + RW_FLAGS_OFFSET, m_flags);
+  return (wrapper_my_b_safe_write(file, buf, ROWS_HEADER_LEN));
+}
+
+bool Rows_log_event::write_data_body(IO_CACHE*file)
+{
+  /*
+     Note that this should be the number of *bits*, not the number of
+     bytes.
+  */
+  uchar sbuf[sizeof(m_width) + 1];
+  my_ptrdiff_t const data_size= m_rows_cur - m_rows_buf;
+  bool res= false;
+  uchar *const sbuf_end= net_store_length(sbuf, (size_t) m_width);
+  DBUG_ASSERT(static_cast<size_t>(sbuf_end - sbuf) <= sizeof(sbuf));
+
+  DBUG_DUMP("m_width", sbuf, (size_t) (sbuf_end - sbuf));
+  res= res || wrapper_my_b_safe_write(file, sbuf, (size_t) (sbuf_end - sbuf));
+
+  DBUG_DUMP("m_cols", (uchar*) m_cols.bitmap, no_bytes_in_map(&m_cols));
+  res= res || wrapper_my_b_safe_write(file, (uchar*) m_cols.bitmap,
+                              no_bytes_in_map(&m_cols));
+  /*
+    TODO[refactor write]: Remove the "down cast" here (and elsewhere).
+   */
+  if (get_type_code() == UPDATE_ROWS_EVENT)
+  {
+    DBUG_DUMP("m_cols_ai", (uchar*) m_cols_ai.bitmap,
+              no_bytes_in_map(&m_cols_ai));
+    res= res || wrapper_my_b_safe_write(file, (uchar*) m_cols_ai.bitmap,
+                                no_bytes_in_map(&m_cols_ai));
+  }
+  DBUG_DUMP("rows", m_rows_buf, data_size);
+  res= res || wrapper_my_b_safe_write(file, m_rows_buf, (size_t) data_size);
+
+  return res;
+
+}
+#endif
+
+#if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT)
+int Rows_log_event::pack_info(Protocol *protocol)
+{
+  char buf[256];
+  char const *const flagstr=
+    get_flags(STMT_END_F) ? " flags: STMT_END_F" : "";
+  size_t bytes= my_snprintf(buf, sizeof(buf),
+                               "table_id: %lu%s", m_table_id, flagstr);
+  protocol->store(buf, bytes, &my_charset_bin);
+  return 0;
+}
+#endif
+
+#ifdef MYSQL_CLIENT
+void Rows_log_event::print_helper(FILE *file,
+                                  PRINT_EVENT_INFO *print_event_info,
+                                  char const *const name)
+{
+  IO_CACHE *const head= &print_event_info->head_cache;
+  IO_CACHE *const body= &print_event_info->body_cache;
+  if (!print_event_info->short_form)
+  {
+    bool const last_stmt_event= get_flags(STMT_END_F);
+    print_header(head, print_event_info, !last_stmt_event);
+    my_b_printf(head, "\t%s: table id %lu%s\n",
+                name, m_table_id,
+                last_stmt_event ? " flags: STMT_END_F" : "");
+    print_base64(body, print_event_info, !last_stmt_event);
+  }
+}
+#endif
+
+/**************************************************************************
+	Table_map_log_event member functions and support functions
+**************************************************************************/
+
+/**
+  @page How replication of field metadata works.
+  
+  When a table map is created, the master first calls 
+  Table_map_log_event::save_field_metadata() which calculates how many 
+  values will be in the field metadata. Only those fields that require the 
+  extra data are added. The method also loops through all of the fields in 
+  the table calling the method Field::save_field_metadata() which returns the
+  values for the field that will be saved in the metadata and replicated to
+  the slave. Once all fields have been processed, the table map is written to
+  the binlog adding the size of the field metadata and the field metadata to
+  the end of the body of the table map.
+
+  When a table map is read on the slave, the field metadata is read from the 
+  table map and passed to the table_def class constructor which saves the 
+  field metadata from the table map into an array based on the type of the 
+  field. Field metadata values not present (those fields that do not use extra 
+  data) in the table map are initialized as zero (0). The array size is the 
+  same as the columns for the table on the slave.
+
+  Additionally, values saved for field metadata on the master are saved as a 
+  string of bytes (uchar) in the binlog. A field may require 1 or more bytes
+  to store the information. In cases where values require multiple bytes 
+  (e.g. values > 255), the endian-safe methods are used to properly encode 
+  the values on the master and decode them on the slave. When the field
+  metadata values are captured on the slave, they are stored in an array of
+  type uint16. This allows the least number of casts to prevent casting bugs
+  when the field metadata is used in comparisons of field attributes. When
+  the field metadata is used for calculating addresses in pointer math, the
+  type used is uint32. 
+*/
+
+#if !defined(MYSQL_CLIENT)
+/**
+  Save the field metadata based on the real_type of the field.
+  The metadata saved depends on the type of the field. Some fields
+  store a single byte for pack_length() while others store two bytes
+  for field_length (max length).
+  
+  @retval  0  Ok.
+
+  @todo
+  We may want to consider changing the encoding of the information.
+  Currently, the code attempts to minimize the number of bytes written to 
+  the tablemap. There are at least two other alternatives; 1) using 
+  net_store_length() to store the data allowing it to choose the number of
+  bytes that are appropriate thereby making the code much easier to 
+  maintain (only 1 place to change the encoding), or 2) use a fixed number
+  of bytes for each field. The problem with option 1 is that net_store_length()
+  will use one byte if the value < 251, but 3 bytes if it is > 250. Thus,
+  for fields like CHAR which can be no larger than 255 characters, the method
+  will use 3 bytes when the value is > 250. Further, every value that is
+  encoded using 2 parts (e.g., pack_length, field_length) will be numerically
+  > 250 therefore will use 3 bytes for eah value. The problem with option 2
+  is less wasteful for space but does waste 1 byte for every field that does
+  not encode 2 parts. 
+*/
+int Table_map_log_event::save_field_metadata()
+{
+  DBUG_ENTER("Table_map_log_event::save_field_metadata");
+  int index= 0;
+  for (unsigned int i= 0 ; i < m_table->s->fields ; i++)
+  {
+    DBUG_PRINT("debug", ("field_type: %d", m_coltype[i]));
+    index+= m_table->s->field[i]->save_field_metadata(&m_field_metadata[index]);
+  }
+  DBUG_RETURN(index);
+}
+#endif /* !defined(MYSQL_CLIENT) */
+
+/*
+  Constructor used to build an event for writing to the binary log.
+  Mats says tbl->s lives longer than this event so it's ok to copy pointers
+  (tbl->s->db etc) and not pointer content.
+ */
+#if !defined(MYSQL_CLIENT)
+Table_map_log_event::Table_map_log_event(THD *thd, TABLE *tbl, ulong tid,
+                                         bool using_trans)
+  : Log_event(thd, 0,
+              using_trans ? Log_event::EVENT_TRANSACTIONAL_CACHE :
+                            Log_event::EVENT_STMT_CACHE,
+              Log_event::EVENT_NORMAL_LOGGING),
+    m_table(tbl),
+    m_dbnam(tbl->s->db.str),
+    m_dblen(m_dbnam ? tbl->s->db.length : 0),
+    m_tblnam(tbl->s->table_name.str),
+    m_tbllen(tbl->s->table_name.length),
+    m_colcnt(tbl->s->fields),
+    m_memory(NULL),
+    m_table_id(tid),
+    m_flags(TM_BIT_LEN_EXACT_F),
+    m_data_size(0),
+    m_field_metadata(0),
+    m_field_metadata_size(0),
+    m_null_bits(0),
+    m_meta_memory(NULL)
+{
+  uchar cbuf[sizeof(m_colcnt) + 1];
+  uchar *cbuf_end;
+  DBUG_ASSERT(m_table_id != ~0UL);
+  /*
+    In TABLE_SHARE, "db" and "table_name" are 0-terminated (see this comment in
+    table.cc / alloc_table_share():
+      Use the fact the key is db/0/table_name/0
+    As we rely on this let's assert it.
+  */
+  DBUG_ASSERT((tbl->s->db.str == 0) ||
+              (tbl->s->db.str[tbl->s->db.length] == 0));
+  DBUG_ASSERT(tbl->s->table_name.str[tbl->s->table_name.length] == 0);
+
+
+  m_data_size=  TABLE_MAP_HEADER_LEN;
+  DBUG_EXECUTE_IF("old_row_based_repl_4_byte_map_id_master", m_data_size= 6;);
+  m_data_size+= m_dblen + 2;	// Include length and terminating \0
+  m_data_size+= m_tbllen + 2;	// Include length and terminating \0
+  cbuf_end= net_store_length(cbuf, (size_t) m_colcnt);
+  DBUG_ASSERT(static_cast<size_t>(cbuf_end - cbuf) <= sizeof(cbuf));
+  m_data_size+= (cbuf_end - cbuf) + m_colcnt;	// COLCNT and column types
+
+  /* If malloc fails, caught in is_valid() */
+  if ((m_memory= (uchar*) my_malloc(m_colcnt, MYF(MY_WME))))
+  {
+    m_coltype= reinterpret_cast<uchar*>(m_memory);
+    for (unsigned int i= 0 ; i < m_table->s->fields ; ++i)
+      m_coltype[i]= m_table->field[i]->binlog_type();
+  }
+
+  /*
+    Calculate a bitmap for the results of maybe_null() for all columns.
+    The bitmap is used to determine when there is a column from the master
+    that is not on the slave and is null and thus not in the row data during
+    replication.
+  */
+  uint num_null_bytes= (m_table->s->fields + 7) / 8;
+  m_data_size+= num_null_bytes;
+  m_meta_memory= (uchar *)my_multi_malloc(MYF(MY_WME),
+                                 &m_null_bits, num_null_bytes,
+                                 &m_field_metadata, (m_colcnt * 2),
+                                 NULL);
+
+  memset(m_field_metadata, 0, (m_colcnt * 2));
+
+  /*
+    Create an array for the field metadata and store it.
+  */
+  m_field_metadata_size= save_field_metadata();
+  DBUG_ASSERT(m_field_metadata_size <= (m_colcnt * 2));
+
+  /*
+    Now set the size of the data to the size of the field metadata array
+    plus one or three bytes (see pack.c:net_store_length) for number of 
+    elements in the field metadata array.
+  */
+  if (m_field_metadata_size < 251)
+    m_data_size+= m_field_metadata_size + 1; 
+  else
+    m_data_size+= m_field_metadata_size + 3; 
+
+  memset(m_null_bits, 0, num_null_bytes);
+  for (unsigned int i= 0 ; i < m_table->s->fields ; ++i)
+    if (m_table->field[i]->maybe_null())
+      m_null_bits[(i / 8)]+= 1 << (i % 8);
+
+}
+#endif /* !defined(MYSQL_CLIENT) */
+
+/*
+  Constructor used by slave to read the event from the binary log.
+ */
+#if defined(HAVE_REPLICATION)
+Table_map_log_event::Table_map_log_event(const char *buf, uint event_len,
+                                         const Format_description_log_event
+                                         *description_event)
+
+  : Log_event(buf, description_event),
+#ifndef MYSQL_CLIENT
+    m_table(NULL),
+#endif
+    m_dbnam(NULL), m_dblen(0), m_tblnam(NULL), m_tbllen(0),
+    m_colcnt(0), m_coltype(0),
+    m_memory(NULL), m_table_id(ULONG_MAX), m_flags(0),
+    m_data_size(0), m_field_metadata(0), m_field_metadata_size(0),
+    m_null_bits(0), m_meta_memory(NULL)
+{
+  unsigned int bytes_read= 0;
+  DBUG_ENTER("Table_map_log_event::Table_map_log_event(const char*,uint,...)");
+
+  uint8 common_header_len= description_event->common_header_len;
+  uint8 post_header_len= description_event->post_header_len[TABLE_MAP_EVENT-1];
+  DBUG_PRINT("info",("event_len: %u  common_header_len: %d  post_header_len: %d",
+                     event_len, common_header_len, post_header_len));
+
+  /*
+    Don't print debug messages when running valgrind since they can
+    trigger false warnings.
+   */
+#ifndef HAVE_purify
+  DBUG_DUMP("event buffer", (uchar*) buf, event_len);
+#endif
+
+  /* Read the post-header */
+  const char *post_start= buf + common_header_len;
+
+  post_start+= TM_MAPID_OFFSET;
+  if (post_header_len == 6)
+  {
+    /* Master is of an intermediate source tree before 5.1.4. Id is 4 bytes */
+    m_table_id= uint4korr(post_start);
+    post_start+= 4;
+  }
+  else
+  {
+    DBUG_ASSERT(post_header_len == TABLE_MAP_HEADER_LEN);
+    m_table_id= (ulong) uint6korr(post_start);
+    post_start+= TM_FLAGS_OFFSET;
+  }
+
+  DBUG_ASSERT(m_table_id != ~0UL);
+
+  m_flags= uint2korr(post_start);
+
+  /* Read the variable part of the event */
+  const char *const vpart= buf + common_header_len + post_header_len;
+
+  /* Extract the length of the various parts from the buffer */
+  uchar const *const ptr_dblen= (uchar const*)vpart + 0;
+  m_dblen= *(uchar*) ptr_dblen;
+
+  /* Length of database name + counter + terminating null */
+  uchar const *const ptr_tbllen= ptr_dblen + m_dblen + 2;
+  m_tbllen= *(uchar*) ptr_tbllen;
+
+  /* Length of table name + counter + terminating null */
+  uchar const *const ptr_colcnt= ptr_tbllen + m_tbllen + 2;
+  uchar *ptr_after_colcnt= (uchar*) ptr_colcnt;
+  m_colcnt= net_field_length(&ptr_after_colcnt);
+
+  DBUG_PRINT("info",("m_dblen: %lu  off: %ld  m_tbllen: %lu  off: %ld  m_colcnt: %lu  off: %ld",
+                     (ulong) m_dblen, (long) (ptr_dblen-(const uchar*)vpart), 
+                     (ulong) m_tbllen, (long) (ptr_tbllen-(const uchar*)vpart),
+                     m_colcnt, (long) (ptr_colcnt-(const uchar*)vpart)));
+
+  /* Allocate mem for all fields in one go. If fails, caught in is_valid() */
+  m_memory= (uchar*) my_multi_malloc(MYF(MY_WME),
+                                     &m_dbnam, (uint) m_dblen + 1,
+                                     &m_tblnam, (uint) m_tbllen + 1,
+                                     &m_coltype, (uint) m_colcnt,
+                                     NullS);
+
+  if (m_memory)
+  {
+    /* Copy the different parts into their memory */
+    strncpy(const_cast<char*>(m_dbnam), (const char*)ptr_dblen  + 1, m_dblen + 1);
+    strncpy(const_cast<char*>(m_tblnam), (const char*)ptr_tbllen + 1, m_tbllen + 1);
+    memcpy(m_coltype, ptr_after_colcnt, m_colcnt);
+
+    ptr_after_colcnt= ptr_after_colcnt + m_colcnt;
+    bytes_read= (uint) (ptr_after_colcnt - (uchar *)buf);
+    DBUG_PRINT("info", ("Bytes read: %d.\n", bytes_read));
+    if (bytes_read < event_len)
+    {
+      m_field_metadata_size= net_field_length(&ptr_after_colcnt);
+      DBUG_ASSERT(m_field_metadata_size <= (m_colcnt * 2));
+      uint num_null_bytes= (m_colcnt + 7) / 8;
+      m_meta_memory= (uchar *)my_multi_malloc(MYF(MY_WME),
+                                     &m_null_bits, num_null_bytes,
+                                     &m_field_metadata, m_field_metadata_size,
+                                     NULL);
+      memcpy(m_field_metadata, ptr_after_colcnt, m_field_metadata_size);
+      ptr_after_colcnt= (uchar*)ptr_after_colcnt + m_field_metadata_size;
+      memcpy(m_null_bits, ptr_after_colcnt, num_null_bytes);
+    }
+  }
+
+  DBUG_VOID_RETURN;
+}
+#endif
+
+Table_map_log_event::~Table_map_log_event()
+{
+  my_free(m_meta_memory);
+  my_free(m_memory);
+}
+
+/*
+  Return value is an error code, one of:
+
+      -1     Failure to open table   [from open_tables()]
+       0     Success
+       1     No room for more tables [from set_table()]
+       2     Out of memory           [from set_table()]
+       3     Wrong table definition
+       4     Daisy-chaining RBR with SBR not possible
+ */
+
+#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
+
+enum enum_tbl_map_status
+{
+  /* no duplicate identifier found */
+  OK_TO_PROCESS= 0,
+
+  /* this table map must be filtered out */
+  FILTERED_OUT= 1,
+
+  /* identifier mapping table with different properties */
+  SAME_ID_MAPPING_DIFFERENT_TABLE= 2,
+  
+  /* a duplicate identifier was found mapping the same table */
+  SAME_ID_MAPPING_SAME_TABLE= 3
+};
+
+/*
+  Checks if this table map event should be processed or not. First
+  it checks the filtering rules, and then looks for duplicate identifiers
+  in the existing list of rli->tables_to_lock.
+
+  It checks that there hasn't been any corruption by verifying that there
+  are no duplicate entries with different properties.
+
+  In some cases, some binary logs could get corrupted, showing several
+  tables mapped to the same table_id, 0 (see: BUG#56226). Thus we do this
+  early sanity check for such cases and avoid that the server crashes 
+  later.
+
+  In some corner cases, the master logs duplicate table map events, i.e.,
+  same id, same database name, same table name (see: BUG#37137). This is
+  different from the above as it's the same table that is mapped again 
+  to the same identifier. Thus we cannot just check for same ids and 
+  assume that the event is corrupted we need to check every property. 
+
+  NOTE: in the event that BUG#37137 ever gets fixed, this extra check 
+        will still be valid because we would need to support old binary 
+        logs anyway.
+
+  @param rli The relay log info reference.
+  @param table_list A list element containing the table to check against.
+  @return OK_TO_PROCESS 
+            if there was no identifier already in rli->tables_to_lock 
+            
+          FILTERED_OUT
+            if the event is filtered according to the filtering rules
+
+          SAME_ID_MAPPING_DIFFERENT_TABLE 
+            if the same identifier already maps a different table in 
+            rli->tables_to_lock
 
-int
-Write_rows_log_event::do_exec_row(const Relay_log_info *const rli)
+          SAME_ID_MAPPING_SAME_TABLE 
+            if the same identifier already maps the same table in 
+            rli->tables_to_lock.
+*/
+static enum_tbl_map_status
+check_table_map(Relay_log_info const *rli, RPL_TABLE_LIST *table_list)
 {
-  DBUG_ASSERT(m_table != NULL);
-  int error= write_row(rli, slave_exec_mode == SLAVE_EXEC_MODE_IDEMPOTENT);
+  DBUG_ENTER("check_table_map");
+  enum_tbl_map_status res= OK_TO_PROCESS;
 
-  if (error && !thd->is_error())
+  if (rli->info_thd->slave_thread /* filtering is for slave only */ &&
+      (!rpl_filter->db_ok(table_list->db) ||
+       (rpl_filter->is_on() && !rpl_filter->tables_ok("", table_list))))
+    res= FILTERED_OUT;
+  else
   {
-    DBUG_ASSERT(0);
-    my_error(ER_UNKNOWN_ERROR, MYF(0));
-  }
+    for(RPL_TABLE_LIST *ptr= static_cast<RPL_TABLE_LIST*>(rli->tables_to_lock);
+        ptr; 
+        ptr= static_cast<RPL_TABLE_LIST*>(ptr->next_local))
+    {
+      if (ptr->table_id == table_list->table_id)
+      {
 
-  return error;
-}
+        if (strcmp(ptr->db, table_list->db) || 
+            strcmp(ptr->alias, table_list->table_name) || 
+            ptr->lock_type != TL_WRITE) // the ::do_apply_event always sets TL_WRITE
+          res= SAME_ID_MAPPING_DIFFERENT_TABLE;
+        else
+          res= SAME_ID_MAPPING_SAME_TABLE;
 
-#endif /* !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) */
+        break;
+      }
+    }
+  }
 
-#ifdef MYSQL_CLIENT
-void Write_rows_log_event::print(FILE *file, PRINT_EVENT_INFO* print_event_info)
-{
-  Rows_log_event::print_helper(file, print_event_info, "Write_rows");
+  DBUG_PRINT("debug", ("check of table map ended up with: %u", res));
+
+  DBUG_RETURN(res);
 }
-#endif
 
-/**************************************************************************
-	Delete_rows_log_event member functions
-**************************************************************************/
+int Table_map_log_event::do_apply_event(Relay_log_info const *rli)
+{
+  RPL_TABLE_LIST *table_list;
+  char *db_mem, *tname_mem, *ptr;
+  size_t dummy_len;
+  void *memory;
+  DBUG_ENTER("Table_map_log_event::do_apply_event(Relay_log_info*)");
+  DBUG_ASSERT(rli->info_thd == thd);
 
-#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
-/*
-  Compares table->record[0] and table->record[1]
+  /* Step the query id to mark what columns that are actually used. */
+  thd->set_query_id(next_query_id());
 
-  Returns TRUE if different.
-*/
-static bool record_compare(TABLE *table, MY_BITMAP *cols)
-{
-  /*
-    Need to set the X bit and the filler bits in both records since
-    there are engines that do not set it correctly.
+  if (!(memory= my_multi_malloc(MYF(MY_WME),
+                                &table_list, (uint) sizeof(RPL_TABLE_LIST),
+                                &db_mem, (uint) NAME_LEN + 1,
+                                &tname_mem, (uint) NAME_LEN + 1,
+                                NullS)))
+    DBUG_RETURN(HA_ERR_OUT_OF_MEM);
 
-    In addition, since MyISAM checks that one hasn't tampered with the
-    record, it is necessary to restore the old bytes into the record
-    after doing the comparison.
+  strmov(db_mem, m_dbnam);
+  strmov(tname_mem, m_tblnam);
 
-    TODO[record format ndb]: Remove it once NDB returns correct
-    records. Check that the other engines also return correct records.
-   */
+  if (lower_case_table_names == 1)
+  {
+    my_casedn_str(system_charset_info, db_mem);
+    my_casedn_str(system_charset_info, tname_mem);
+  }
 
-  DBUG_DUMP("record[0]", table->record[0], table->s->reclength);
-  DBUG_DUMP("record[1]", table->record[1], table->s->reclength);
+  /* rewrite rules changed the database */
+  if (((ptr= (char*) rpl_filter->get_rewrite_db(db_mem, &dummy_len)) != db_mem))
+    strmov(db_mem, ptr);
 
-  bool result= FALSE;
-  uchar saved_x[2]= {0, 0}, saved_filler[2]= {0, 0};
+  table_list->init_one_table(db_mem, strlen(db_mem),
+                             tname_mem, strlen(tname_mem),
+                             tname_mem, TL_WRITE);
 
-  if (table->s->null_bytes > 0)
+  table_list->table_id= DBUG_EVALUATE_IF("inject_tblmap_same_id_maps_diff_table", 0, m_table_id);
+  table_list->updating= 1;
+  DBUG_PRINT("debug", ("table: %s is mapped to %u", table_list->table_name, table_list->table_id));
+  enum_tbl_map_status tblmap_status= check_table_map(rli, table_list);
+  if (tblmap_status == OK_TO_PROCESS)
   {
-    for (int i = 0 ; i < 2 ; ++i)
-    {
-      /* 
-        If we have an X bit then we need to take care of it.
-      */
-      if (!(table->s->db_options_in_use & HA_OPTION_PACK_RECORD))
-      {
-        saved_x[i]= table->record[i][0];
-        table->record[i][0]|= 1U;
-      }
-
-      /*
-         If (last_null_bit_pos == 0 && null_bytes > 1), then:
+    DBUG_ASSERT(thd->lex->query_tables != table_list);
 
-         X bit (if any) + N nullable fields + M Field_bit fields = 8 bits 
+    /*
+      Use placement new to construct the table_def instance in the
+      memory allocated for it inside table_list.
 
-         Ie, the entire byte is used.
-      */
-      if (table->s->last_null_bit_pos > 0)
-      {
-        saved_filler[i]= table->record[i][table->s->null_bytes - 1];
-        table->record[i][table->s->null_bytes - 1]|=
-          256U - (1U << table->s->last_null_bit_pos);
-      }
-    }
-  }
+      The memory allocated by the table_def structure (i.e., not the
+      memory allocated *for* the table_def structure) is released
+      inside Relay_log_info::clear_tables_to_lock() by calling the
+      table_def destructor explicitly.
+    */
+    new (&table_list->m_tabledef)
+      table_def(m_coltype, m_colcnt,
+                m_field_metadata, m_field_metadata_size,
+                m_null_bits, m_flags);
+    table_list->m_tabledef_valid= TRUE;
+    table_list->m_conv_table= NULL;
+    table_list->open_type= OT_BASE_ONLY;
 
-  /**
-    Compare full record only if:
-    - there are no blob fields (otherwise we would also need 
-      to compare blobs contents as well);
-    - there are no varchar fields (otherwise we would also need
-      to compare varchar contents as well);
-    - there are no null fields, otherwise NULLed fields 
-      contents (i.e., the don't care bytes) may show arbitrary 
-      values, depending on how each engine handles internally.
-    - if all the bitmap is set (both are full rows)
-    */
-  if ((table->s->blob_fields + 
-       table->s->varchar_fields +
-       table->s->null_fields) == 0 &&
-      bitmap_is_set_all(cols))
-  {
-    result= cmp_record(table,record[1]);
+    /*
+      We record in the slave's information that the table should be
+      locked by linking the table into the list of tables to lock.
+    */
+    table_list->next_global= table_list->next_local= rli->tables_to_lock;
+    const_cast<Relay_log_info*>(rli)->tables_to_lock= table_list;
+    const_cast<Relay_log_info*>(rli)->tables_to_lock_count++;
+    /* 'memory' is freed in clear_tables_to_lock */
   }
-  else
+  else  // FILTERED_OUT, SAME_ID_MAPPING_*
   {
-    /* 
-      Fallback to field-by-field comparison:
-      1. start by checking if the field is signaled:
-      2. if it is, first compare the null bit if the field is nullable
-      3. then compare the contents of the field, if it is not 
-         set to null
+    /*
+      If mapped already but with different properties, we raise an
+      error.
+      If mapped already but with same properties we skip the event.
+      If filtered out we skip the event.
+
+      In all three cases, we need to free the memory previously 
+      allocated.
      */
-    for (Field **ptr=table->field ; 
-         *ptr && ((*ptr)->field_index < cols->n_bits) && !result;
-         ptr++)
+    if (tblmap_status == SAME_ID_MAPPING_DIFFERENT_TABLE)
     {
-      Field *field= *ptr;
+      /*
+        Something bad has happened. We need to stop the slave as strange things
+        could happen if we proceed: slave crash, wrong table being updated, ...
+        As a consequence we push an error in this case.
+       */
 
-      if (bitmap_is_set(cols, field->field_index))
-      {
-        /* compare null bit */
-        if (field->is_null() != field->is_null_in_record(table->record[1]))
-          result= TRUE;
-
-        /* compare content, only if fields are not set to NULL */
-        else if (!field->is_null())
-          result= field->cmp_binary_offset(table->s->rec_buff_length);
-      }
-    }
+      char buf[256];
+
+      my_snprintf(buf, sizeof(buf), 
+                  "Found table map event mapping table id %u which "
+                  "was already mapped but with different settings.",
+                  table_list->table_id);
+
+      if (thd->slave_thread)
+        rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR, 
+                    ER(ER_SLAVE_FATAL_ERROR), buf);
+      else
+        /* 
+          For the cases in which a 'BINLOG' statement is set to 
+          execute in a user session 
+         */
+        my_printf_error(ER_SLAVE_FATAL_ERROR, ER(ER_SLAVE_FATAL_ERROR), 
+                        MYF(0), buf);
+    } 
+    
+    my_free(memory);
   }
 
-  /*
-    Restore the saved bytes.
+  DBUG_RETURN(tblmap_status == SAME_ID_MAPPING_DIFFERENT_TABLE);
+}
 
-    TODO[record format ndb]: Remove this code once NDB returns the
-    correct record format.
+Log_event::enum_skip_reason
+Table_map_log_event::do_shall_skip(Relay_log_info *rli)
+{
+  /*
+    If the slave skip counter is 1, then we should not start executing
+    on the next event.
   */
-  if (table->s->null_bytes > 0)
-  {
-    for (int i = 0 ; i < 2 ; ++i)
-    {
-      if (!(table->s->db_options_in_use & HA_OPTION_PACK_RECORD))
-        table->record[i][0]= saved_x[i];
+  return continue_group(rli);
+}
 
-      if (table->s->last_null_bit_pos)
-        table->record[i][table->s->null_bytes - 1]= saved_filler[i];
-    }
-  }
+int Table_map_log_event::do_update_pos(Relay_log_info *rli)
+{
+  rli->inc_event_relay_log_pos();
+  return 0;
+}
+
+#endif /* !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) */
 
-  return result;
+#ifndef MYSQL_CLIENT
+bool Table_map_log_event::write_data_header(IO_CACHE *file)
+{
+  DBUG_ASSERT(m_table_id != ~0UL);
+  uchar buf[TABLE_MAP_HEADER_LEN];
+  DBUG_EXECUTE_IF("old_row_based_repl_4_byte_map_id_master",
+                  {
+                    int4store(buf + 0, m_table_id);
+                    int2store(buf + 4, m_flags);
+                    return (wrapper_my_b_safe_write(file, buf, 6));
+                  });
+  int6store(buf + TM_MAPID_OFFSET, (ulonglong)m_table_id);
+  int2store(buf + TM_FLAGS_OFFSET, m_flags);
+  return (wrapper_my_b_safe_write(file, buf, TABLE_MAP_HEADER_LEN));
 }
 
+bool Table_map_log_event::write_data_body(IO_CACHE *file)
+{
+  DBUG_ASSERT(m_dbnam != NULL);
+  DBUG_ASSERT(m_tblnam != NULL);
+  /* We use only one byte per length for storage in event: */
+  DBUG_ASSERT(m_dblen < 128);
+  DBUG_ASSERT(m_tbllen < 128);
+
+  uchar const dbuf[]= { (uchar) m_dblen };
+  uchar const tbuf[]= { (uchar) m_tbllen };
 
-/**
-  Checks if any of the columns in the given table is
-  signaled in the bitmap.
+  uchar cbuf[sizeof(m_colcnt) + 1];
+  uchar *const cbuf_end= net_store_length(cbuf, (size_t) m_colcnt);
+  DBUG_ASSERT(static_cast<size_t>(cbuf_end - cbuf) <= sizeof(cbuf));
 
-  For each column in the given table checks if it is
-  signaled in the bitmap. This is most useful when deciding
-  whether a before image (BI) can be used or not for 
-  searching a row. If no column is signaled, then the 
-  image cannot be used for searching a record (regardless 
-  of using position(), index scan or table scan). Here is 
-  an example:
+  /*
+    Store the size of the field metadata.
+  */
+  uchar mbuf[sizeof(m_field_metadata_size)];
+  uchar *const mbuf_end= net_store_length(mbuf, m_field_metadata_size);
 
-  MASTER> SET @@binlog_row_image='MINIMAL';
-  MASTER> CREATE TABLE t1 (a int, b int, c int, primary key(c));
-  SLAVE> CREATE TABLE t1 (a int, b int);
-  MASTER> INSERT INTO t1 VALUES (1,2,3);
-  MASTER> UPDATE t1 SET a=2 WHERE b=2;
+  return (wrapper_my_b_safe_write(file, dbuf,      sizeof(dbuf)) ||
+          wrapper_my_b_safe_write(file, (const uchar*)m_dbnam,   m_dblen+1) ||
+          wrapper_my_b_safe_write(file, tbuf,      sizeof(tbuf)) ||
+          wrapper_my_b_safe_write(file, (const uchar*)m_tblnam,  m_tbllen+1) ||
+          wrapper_my_b_safe_write(file, cbuf, (size_t) (cbuf_end - cbuf)) ||
+          wrapper_my_b_safe_write(file, m_coltype, m_colcnt) ||
+          wrapper_my_b_safe_write(file, mbuf, (size_t) (mbuf_end - mbuf)) ||
+          wrapper_my_b_safe_write(file, m_field_metadata, m_field_metadata_size),
+          wrapper_my_b_safe_write(file, m_null_bits, (m_colcnt + 7) / 8));
+ }
+#endif
 
-  For the update statement only the PK (column c) is 
-  logged in the before image (BI). As such, given that 
-  the slave has no column c, it will not be able to 
-  find the row, because BI has no values for the columns
-  the slave knows about (column a and b).
+#if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT)
 
-  @param table   the table reference on the slave.
-  @param cols the bitmap signaling columns available in 
-                 the BI.
+/*
+  Print some useful information for the SHOW BINARY LOG information
+  field.
+ */
 
-  @return TRUE if BI contains usable colums for searching, 
-          FALSE otherwise.
-*/
-static
-my_bool is_any_column_signaled_for_table(TABLE *table, MY_BITMAP *cols)
+#if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT)
+int Table_map_log_event::pack_info(Protocol *protocol)
 {
-
-  int nfields_set= 0;
-  for (Field **ptr=table->field ; 
-       *ptr && ((*ptr)->field_index < cols->n_bits);
-       ptr++)
-  {
-    if (bitmap_is_set(cols, (*ptr)->field_index))
-      nfields_set++;
-  }
-
-  return (nfields_set != 0);
+  char buf[256];
+  size_t bytes= my_snprintf(buf, sizeof(buf),
+                            "table_id: %lu (%s.%s)",
+                            m_table_id, m_dbnam, m_tblnam);
+  protocol->store(buf, bytes, &my_charset_bin);
+  return 0;
 }
+#endif
 
-/**
-  Checks if the fields in the given key are signaled in
-  the bitmap.
-
-  Validates whether the before image is usable for the
-  given key. It can be the case that the before image
-  does not contain values for the key (eg, master was
-  using 'minimal' option for image logging and slave has
-  different index structure on the table). Here is an
-  example:
 
-  MASTER> SET @@binlog_row_image='MINIMAL';
-  MASTER> CREATE TABLE t1 (a int, b int, c int, primary key(c));
-  SLAVE> CREATE TABLE t1 (a int, b int, c int, key(a,c));
-  MASTER> INSERT INTO t1 VALUES (1,2,3);
-  MASTER> UPDATE t1 SET a=2 WHERE b=2;
+#endif
 
-  When finding the row on the slave, one cannot use the
-  index (a,c) to search for the row, because there is only
-  data in the before image for column c. This function
-  checks the fields needed for a given key and searches
-  the bitmap to see if all the fields required are 
-  signaled.
-  
-  @param keyinfo  reference to key.
-  @param cols     the bitmap signaling which columns 
-                  have available data.
 
-  @return TRUE if all fields are signaled in the bitmap 
-          for the given key, FALSE otherwise.
-*/
-static
-my_bool are_all_columns_signaled_for_key(KEY *keyinfo, MY_BITMAP *cols)
+#ifdef MYSQL_CLIENT
+void Table_map_log_event::print(FILE *, PRINT_EVENT_INFO *print_event_info)
 {
-  for (uint i=0 ; i < keyinfo->key_parts ;i++)
+  if (!print_event_info->short_form)
   {
-    uint fieldnr= keyinfo->key_part[i].fieldnr - 1;
-    if (fieldnr >= cols->n_bits || 
-        !bitmap_is_set(cols, fieldnr))
-      return FALSE;
+    print_header(&print_event_info->head_cache, print_event_info, TRUE);
+    my_b_printf(&print_event_info->head_cache,
+                "\tTable_map: `%s`.`%s` mapped to number %lu\n",
+                m_dbnam, m_tblnam, m_table_id);
+    print_base64(&print_event_info->body_cache, print_event_info, TRUE);
   }
- 
-  return TRUE;
 }
+#endif
 
-/**
-  Searches the table for a given key that can be used
-  according to the existing values, ie, columns set
-  in the bitmap.
-
-  The caller can specify which type of key to find by
-  setting the following flags in the key_type parameter:
-
-    - PRI_KEY_FLAG
-      Returns the primary key.
-
-    - UNIQUE_KEY_FLAG
-      Returns a unique key (flagged with HA_NOSAME)
-
-    - MULTIPLE_KEY_FLAG
-      Returns a key that is not unique (flagged with HA_NOSAME 
-      and without HA_NULL_PART_KEY) nor PK.
-
-  The above flags can be used together, in which case, the
-  search is conducted in the above listed order. Eg, the 
-  following flag:
-
-    (PRI_KEY_FLAG | UNIQUE_KEY_FLAG | MULTIPLE_KEY_FLAG)
-
-  means that a primary key is returned if it is suitable. If
-  not then the unique keys are searched. If no unique key is
-  suitable, then the keys are searched. Finally, if no key
-  is suitable, MAX_KEY is returned.
+/**************************************************************************
+	Write_rows_log_event member functions
+**************************************************************************/
 
-  @param table    reference to the table.
-  @param bi_cols  a bitmap that filters out columns that should
-                  not be considered while searching the key. 
-                  Columns that should be considered are set.
-  @param key_type the type of key to search for.
+/*
+  Constructor used to build an event for writing to the binary log.
+ */
+#if !defined(MYSQL_CLIENT)
+Write_rows_log_event::Write_rows_log_event(THD *thd_arg, TABLE *tbl_arg,
+                                           ulong tid_arg,
+                                           bool is_transactional)
+  : Rows_log_event(thd_arg, tbl_arg, tid_arg, tbl_arg->write_set, is_transactional)
+{
+}
+#endif
 
-  @return MAX_KEY if no key, according to the key_type specified
-          is suitable. Returns the key otherwise.
+/*
+  Constructor used by slave to read the event from the binary log.
+ */
+#ifdef HAVE_REPLICATION
+Write_rows_log_event::Write_rows_log_event(const char *buf, uint event_len,
+                                           const Format_description_log_event
+                                           *description_event)
+: Rows_log_event(buf, event_len, WRITE_ROWS_EVENT, description_event)
+{
+}
+#endif
 
-*/
-static
-uint
-search_key_in_table(TABLE *table, MY_BITMAP *bi_cols, uint key_type)
+#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
+int 
+Write_rows_log_event::do_before_row_operations(const Slave_reporting_capability *const)
 {
-  KEY *keyinfo;
-  uint res= MAX_KEY;
-  uint key;
+  int error= 0;
 
-  if (key_type & PRI_KEY_FLAG && (table->s->primary_key < MAX_KEY))
-  {
-    keyinfo= table->s->key_info + (uint) table->s->primary_key;
-    if (are_all_columns_signaled_for_key(keyinfo, bi_cols)) 
-      return table->s->primary_key;
-  }
+  /*
+    Increment the global status insert count variable
+  */
+  if (get_flags(STMT_END_F))
+    status_var_increment(thd->status_var.com_stat[SQLCOM_INSERT]);
 
-  if (key_type & UNIQUE_KEY_FLAG && table->s->uniques)
+  /**
+     todo: to introduce a property for the event (handler?) which forces
+     applying the event in the replace (idempotent) fashion.
+  */
+  if ((slave_exec_mode == SLAVE_EXEC_MODE_IDEMPOTENT) ||
+      (m_table->s->db_type()->db_type == DB_TYPE_NDBCLUSTER))
   {
-    for (key=0,keyinfo= table->key_info ; 
-         (key < table->s->keys) && (res == MAX_KEY);
-         key++,keyinfo++)
-    {
-      /*
-        - Unique keys cannot be disabled, thence we skip the check.
-        - Skip unique keys with nullable parts
-        - Skip primary keys
-      */
-      if (!((keyinfo->flags & (HA_NOSAME | HA_NULL_PART_KEY)) != HA_NOSAME) ||
-          (key == table->s->primary_key))
-        continue;
-      res= are_all_columns_signaled_for_key(keyinfo, bi_cols) ? 
-           key : MAX_KEY;
-
-      if (res < MAX_KEY)
-        return res;
-    }
+    /*
+      We are using REPLACE semantics and not INSERT IGNORE semantics
+      when writing rows, that is: new rows replace old rows.  We need to
+      inform the storage engine that it should use this behaviour.
+    */
+    
+    /* Tell the storage engine that we are using REPLACE semantics. */
+    thd->lex->duplicates= DUP_REPLACE;
+    
+    /*
+      Pretend we're executing a REPLACE command: this is needed for
+      InnoDB and NDB Cluster since they are not (properly) checking the
+      lex->duplicates flag.
+    */
+    thd->lex->sql_command= SQLCOM_REPLACE;
+    /* 
+       Do not raise the error flag in case of hitting to an unique attribute
+    */
+    m_table->file->extra(HA_EXTRA_IGNORE_DUP_KEY);
+    /* 
+       NDB specific: update from ndb master wrapped as Write_rows
+       so that the event should be applied to replace slave's row
+    */
+    m_table->file->extra(HA_EXTRA_WRITE_CAN_REPLACE);
+    /* 
+       NDB specific: if update from ndb master wrapped as Write_rows
+       does not find the row it's assumed idempotent binlog applying
+       is taking place; don't raise the error.
+    */
+    m_table->file->extra(HA_EXTRA_IGNORE_NO_KEY);
+    /*
+      TODO: the cluster team (Tomas?) says that it's better if the engine knows
+      how many rows are going to be inserted, then it can allocate needed memory
+      from the start.
+    */
   }
 
-  if (key_type & MULTIPLE_KEY_FLAG && table->s->keys)
-  {
-    for (key=0,keyinfo= table->key_info ; 
-         (key < table->s->keys) && (res == MAX_KEY);
-         key++,keyinfo++)
-    {
-      /*
-        - Skip innactive keys
-        - Skip unique keys without nullable parts
-        - Skip primary keys
-      */
-      if (!(table->s->keys_in_use.is_set(key)) ||
-          ((keyinfo->flags & (HA_NOSAME | HA_NULL_PART_KEY)) == HA_NOSAME) ||
-          (key == table->s->primary_key))
-        continue;
+ 
+  /* Honor next number column if present */
+  m_table->next_number_field= m_table->found_next_number_field;
+  /*
+   * Fixed Bug#45999, In RBR, Store engine of Slave auto-generates new
+   * sequence numbers for auto_increment fields if the values of them are 0.
+   * If generateing a sequence number is decided by the values of
+   * table->auto_increment_field_not_null and SQL_MODE(if includes
+   * MODE_NO_AUTO_VALUE_ON_ZERO) in update_auto_increment function.
+   * SQL_MODE of slave sql thread is always consistency with master's.
+   * In RBR, auto_increment fields never are NULL.
+   */
+  m_table->auto_increment_field_not_null= TRUE;
 
-      res= are_all_columns_signaled_for_key(keyinfo, bi_cols) ? 
-           key : MAX_KEY;
+  /**
+     Sets it to ROW_LOOKUP_NOT_NEEDED.
+   */
+  decide_row_lookup_algorithm_and_key();
+  DBUG_ASSERT(m_rows_lookup_algorithm==ROW_LOOKUP_NOT_NEEDED);
+  return error;
+}
 
-      if (res < MAX_KEY)
-        return res;
-    }
+int 
+Write_rows_log_event::do_after_row_operations(const Slave_reporting_capability *const,
+                                              int error)
+{
+  int local_error= 0;
+  m_table->next_number_field=0;
+  m_table->auto_increment_field_not_null= FALSE;
+  if ((slave_exec_mode == SLAVE_EXEC_MODE_IDEMPOTENT) ||
+      m_table->s->db_type()->db_type == DB_TYPE_NDBCLUSTER)
+  {
+    m_table->file->extra(HA_EXTRA_NO_IGNORE_DUP_KEY);
+    m_table->file->extra(HA_EXTRA_WRITE_CANNOT_REPLACE);
+    /*
+      resetting the extra with 
+      table->file->extra(HA_EXTRA_NO_IGNORE_NO_KEY); 
+      fires bug#27077
+      explanation: file->reset() performs this duty
+      ultimately. Still todo: fix
+    */
+  }
+  if ((local_error= m_table->file->ha_end_bulk_insert()))
+  {
+    m_table->file->print_error(local_error, MYF(0));
   }
 
-  return res;
+  m_rows_lookup_algorithm= ROW_LOOKUP_UNDEFINED;
+
+  return error? error : local_error;
 }
 
-/* 
-  Check if we are already spending too much time on this statement.
-  if we are, warn user that it might be because table does not have
-  a PK, but only if the warning was not printed before for this STMT.
-
-  @param type          The event type code.
-  @param table_name    The name of the table that the slave is 
-                       operating.
-  @param is_index_scan States whether the slave is doing an index scan 
-                       or not.
-  @param rli           The relay metadata info.
+#if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
+
+/*
+  Check if there are more UNIQUE keys after the given key.
 */
-static inline 
-void issue_long_find_row_warning(Log_event_type type, 
-                                 const char *table_name,
-                                 bool is_index_scan,
-                                 const Relay_log_info *rli)
+static int
+last_uniq_key(TABLE *table, uint keyno)
 {
-  if ((log_warnings > 1 && 
-      !const_cast<Relay_log_info*>(rli)->is_long_find_row_note_printed()))
-  {
-    time_t now= my_time(0);
-    time_t stmt_ts= const_cast<Relay_log_info*>(rli)->get_row_stmt_start_timestamp();
-    
-    DBUG_EXECUTE_IF("inject_long_find_row_note", 
-                    stmt_ts-=(LONG_FIND_ROW_THRESHOLD*2););
-
-    long delta= (long) (now - stmt_ts);
-
-    if (delta > LONG_FIND_ROW_THRESHOLD)
-    {
-      const_cast<Relay_log_info*>(rli)->set_long_find_row_note_printed();
-      const char* evt_type= type == DELETE_ROWS_EVENT ? " DELETE" : "n UPDATE";
-      const char* scan_type= is_index_scan ? "scanning an index" : "scanning the table";
-
-      sql_print_information("The slave is applying a ROW event on behalf of a%s statement "
-                            "on table %s and is currently taking a considerable amount "
-                            "of time (%ld seconds). This is due to the fact that it is %s "
-                            "while looking up records to be processed. Consider adding a "
-                            "primary key (or unique key) to the table to improve "
-                            "performance.", evt_type, table_name, delta, scan_type);
-    }
-  }
+  while (++keyno < table->s->keys)
+    if (table->key_info[keyno].flags & HA_NOSAME)
+      return 0;
+  return 1;
 }
 
 /**
-  Locate the current row in event's table.
+   Check if an error is a duplicate key error.
 
-  The current row is pointed by @c m_curr_row. Member @c m_width tells how many 
-  columns are there in the row (this can be differnet from the number of columns 
-  in the table). It is assumed that event's table is already open and pointed 
-  by @c m_table.
-
-  If a corresponding record is found in the table it is stored in 
-  @c m_table->record[0]. Note that when record is located based on a primary 
-  key, it is possible that the record found differs from the row being located.
-
-  If no key is specified or table does not have keys, a table scan is used to 
-  find the row. In that case the row should be complete and contain values for
-  all columns. However, it can still be shorter than the table, i.e. the table 
-  can contain extra columns not present in the row. It is also possible that 
-  the table has fewer columns than the row being located. 
+   This function is used to check if an error code is one of the
+   duplicate key error, i.e., and error code for which it is sensible
+   to do a <code>get_dup_key()</code> to retrieve the duplicate key.
 
-  @returns Error code on failure, 0 on success. 
-  
-  @post In case of success @c m_table->record[0] contains the record found. 
-  Also, the internal "cursor" of the table is positioned at the record found.
+   @param errcode The error code to check.
 
-  @note If the engine allows random access of the records, a combination of
-  @c position() and @c rnd_pos() will be used. 
+   @return <code>true</code> if the error code is such that
+   <code>get_dup_key()</code> will return true, <code>false</code>
+   otherwise.
  */
-
-
-int Rows_log_event::find_row(const Relay_log_info *rli)
+bool
+is_duplicate_key_error(int errcode)
 {
-  DBUG_ENTER("Rows_log_event::find_row");
-
-  DBUG_ASSERT(m_table && m_table->in_use != NULL);
-
-  TABLE *table= m_table;
-  int error= 0;
-  KEY *keyinfo;
-  uint key;
-  bool is_table_scan= false, is_index_scan= false;
-
-  /*
-    rpl_row_tabledefs.test specifies that
-    if the extra field on the slave does not have a default value
-    and this is okay with Delete or Update events.
-    Todo: fix wl3228 hld that requires defauls for all types of events
-  */
-  
-  prepare_record(table, &m_cols, FALSE);
-  error= unpack_current_row(rli, &m_cols);
-
-  // Temporary fix to find out why it fails [/Matz]
-  memcpy(m_table->read_set->bitmap, m_cols.bitmap, (m_table->read_set->n_bits + 7) / 8);
-
-  if (!is_any_column_signaled_for_table(table, &m_cols))
+  switch (errcode)
   {
-    error= HA_ERR_END_OF_FILE;
-    goto err;
+  case HA_ERR_FOUND_DUPP_KEY:
+  case HA_ERR_FOUND_DUPP_UNIQUE:
+    return true;
   }
+  return false;
+}
 
-#ifndef DBUG_OFF
-  DBUG_PRINT("info",("looking for the following record"));
-  DBUG_DUMP("record[0]", table->record[0], table->s->reclength);
-#endif
+/**
+  Write the current row into event's table.
 
-  if ((key= search_key_in_table(table, &m_cols, PRI_KEY_FLAG)) >= MAX_KEY)
-    /* we dont have a PK, or PK is not usable with BI values */
-    goto INDEX_SCAN;
+  The row is located in the row buffer, pointed by @c m_curr_row member.
+  Number of columns of the row is stored in @c m_width member (it can be 
+  different from the number of columns in the table to which we insert). 
+  Bitmap @c m_cols indicates which columns are present in the row. It is assumed 
+  that event's table is already open and pointed by @c m_table.
 
-  if ((table->file->ha_table_flags() & HA_PRIMARY_KEY_REQUIRED_FOR_POSITION))
-  {
+  If the same record already exists in the table it can be either overwritten 
+  or an error is reported depending on the value of @c overwrite flag 
+  (error reporting not yet implemented). Note that the matching record can be
+  different from the row we insert if we use primary keys to identify records in
+  the table.
 
-    if ((table->file->ha_table_flags() & HA_READ_BEFORE_WRITE_REMOVAL))
-    {
-      /*
-        Read removal is possible since the engine supports write without
-        previous read using full primary key
-      */
-      DBUG_PRINT("info", ("using read before write removal"));
+  The row to be inserted can contain values only for selected columns. The 
+  missing columns are filled with default values using @c prepare_record() 
+  function. If a matching record is found in the table and @c overwritte is
+  true, the missing columns are taken from it.
 
-      /*
-        Tell the handler to ignore if key exists or not, since it's
-        not yet known if the key does exist(when using rbwr)
-      */
-      table->file->extra(HA_EXTRA_IGNORE_NO_KEY);
-      DBUG_RETURN(0);
-    }
+  @param  rli   Relay log info (needed for row unpacking).
+  @param  overwrite  
+                Shall we overwrite if the row already exists or signal 
+                error (currently ignored).
 
-    /*
-      Use a more efficient method to fetch the record given by
-      table->record[0] if the engine allows it.  We first compute a
-      row reference using the position() member function (it will be
-      stored in table->file->ref) and the use rnd_pos() to position
-      the "cursor" (i.e., record[0] in this case) at the correct row.
+  @returns Error code on failure, 0 on success.
 
-      TODO: Add a check that the correct record has been fetched by
-      comparing with the original record. Take into account that the
-      record on the master and slave can be of different
-      length. Something along these lines should work:
+  This method, if successful, sets @c m_curr_row_end pointer to point at the
+  next row in the rows buffer. This is done when unpacking the row to be 
+  inserted.
 
-      ADD>>>  store_record(table,record[1]);
-              int error= table->file->rnd_pos(table->record[0], table->file->ref);
-      ADD>>>  DBUG_ASSERT(memcmp(table->record[1], table->record[0],
-                                 table->s->reclength) == 0);
+  @note If a matching record is found, it is either updated using 
+  @c ha_update_row() or first deleted and then new record written.
+*/ 
 
-    */
+int
+Write_rows_log_event::write_row(const Relay_log_info *const rli,
+                                const bool overwrite)
+{
+  DBUG_ENTER("write_row");
+  DBUG_ASSERT(m_table != NULL && thd != NULL);
 
-    DBUG_PRINT("info",("locating record using primary key (position)"));
-    int error;
-    if (table->file->inited && (error= table->file->ha_index_end()))
-      DBUG_RETURN(error);
-    if ((error= table->file->ha_rnd_init(FALSE)))
-      DBUG_RETURN(error);
+  TABLE *table= m_table;  // pointer to event's table
+  int error;
+  int UNINIT_VAR(keynum);
+  auto_afree_ptr<char> key(NULL);
 
-    error= table->file->rnd_pos_by_record(table->record[0]);
+  prepare_record(table, &m_cols,
+                 table->file->ht->db_type != DB_TYPE_NDBCLUSTER);
 
-    table->file->ha_rnd_end();
-    if (error)
-    {
-      DBUG_PRINT("info",("rnd_pos returns error %d",error));
-      if (error == HA_ERR_RECORD_DELETED)
-        error= HA_ERR_KEY_NOT_FOUND;
-      table->file->print_error(error, MYF(0));
-    }
+  /* unpack row into table->record[0] */
+  if ((error= unpack_current_row(rli, &m_cols)))
     DBUG_RETURN(error);
-  }
-
-  // We can't use position() - try other methods.
-  
-INDEX_SCAN:
-
-  /*
-    Save copy of the record in table->record[1]. It might be needed 
-    later if linear search is used to find exact match.
-   */ 
-  store_record(table,record[1]);    
 
-  if ((key= search_key_in_table(table, &m_cols, 
-                                (PRI_KEY_FLAG | UNIQUE_KEY_FLAG | MULTIPLE_KEY_FLAG))) 
-       >= MAX_KEY)
-    /* we dont have a key, or no key is suitable for the BI values */
-    goto TABLE_SCAN; 
+  // Temporary fix to find out why it fails [/Matz]
+  memcpy(m_table->write_set->bitmap, m_cols.bitmap, (m_table->write_set->n_bits + 7) / 8);
 
+  if (m_curr_row == m_rows_buf)
   {
-    keyinfo= table->key_info + key;
-
-
-    DBUG_PRINT("info",("locating record using primary key (index_read)"));
-
-    /* The key'th key is active and usable: search the table using the index */
-    if (!table->file->inited && (error= table->file->ha_index_init(key, FALSE)))
-    {
-      DBUG_PRINT("info",("ha_index_init returns error %d",error));
-      table->file->print_error(error, MYF(0));
-      goto err;
-    }
+    /* this is the first row to be inserted, we estimate the rows with
+       the size of the first row and use that value to initialize
+       storage engine for bulk insertion */
+    DBUG_ASSERT(!(m_curr_row > m_curr_row_end));
+    ulong estimated_rows= 0;
+    if (m_curr_row < m_curr_row_end)
+      estimated_rows= (m_rows_end - m_curr_row) / (m_curr_row_end - m_curr_row);
+    else if (m_curr_row == m_curr_row_end)
+      estimated_rows= 1;
 
-    /* Fill key data for the row */
+    m_table->file->ha_start_bulk_insert(estimated_rows);
+  }
+  
+  
+#ifndef DBUG_OFF
+  DBUG_DUMP("record[0]", table->record[0], table->s->reclength);
+  DBUG_PRINT_BITSET("debug", "write_set = %s", table->write_set);
+  DBUG_PRINT_BITSET("debug", "read_set = %s", table->read_set);
+#endif
 
-    DBUG_ASSERT(m_key);
-    key_copy(m_key, table->record[0], keyinfo, 0);
+  /* 
+    Try to write record. If a corresponding record already exists in the table,
+    we try to change it using ha_update_row() if possible. Otherwise we delete
+    it and repeat the whole process again. 
 
-    /*
-      Don't print debug messages when running valgrind since they can
-      trigger false warnings.
-     */
-#ifndef HAVE_purify
-    DBUG_DUMP("key data", m_key, keyinfo->key_length);
-#endif
+    TODO: Add safety measures against infinite looping. 
+   */
 
-    /*
-      We need to set the null bytes to ensure that the filler bit are
-      all set when returning.  There are storage engines that just set
-      the necessary bits on the bytes and don't set the filler bits
-      correctly.
-    */
-    if (table->s->null_bytes > 0)
-      table->record[0][table->s->null_bytes - 1]|=
-        256U - (1U << table->s->last_null_bit_pos);
+  m_table->mark_columns_per_binlog_row_image();
 
-    if ((error= table->file->ha_index_read_map(table->record[0], m_key,
-                                               HA_WHOLE_KEY,
-                                               HA_READ_KEY_EXACT)))
+  while ((error= table->file->ha_write_row(table->record[0])))
+  {
+    if (error == HA_ERR_LOCK_DEADLOCK ||
+        error == HA_ERR_LOCK_WAIT_TIMEOUT ||
+        (keynum= table->file->get_dup_key(error)) < 0 ||
+        !overwrite)
     {
-      DBUG_PRINT("info",("no record matching the key found in the table"));
-      if (error == HA_ERR_RECORD_DELETED)
-        error= HA_ERR_KEY_NOT_FOUND;
-      table->file->print_error(error, MYF(0));
-      table->file->ha_index_end();
-      goto err;
-    }
-
-  /*
-    Don't print debug messages when running valgrind since they can
-    trigger false warnings.
-   */
-#ifndef HAVE_purify
-    DBUG_PRINT("info",("found first matching record")); 
-    DBUG_DUMP("record[0]", table->record[0], table->s->reclength);
-#endif
+      DBUG_PRINT("info",("get_dup_key returns %d)", keynum));
+      /*
+        Deadlock, waiting for lock or just an error from the handler
+        such as HA_ERR_FOUND_DUPP_KEY when overwrite is false.
+        Retrieval of the duplicate key number may fail
+        - either because the error was not "duplicate key" error
+        - or because the information which key is not available
+      */
+      table->file->print_error(error, MYF(0));
+      goto error;
+    }
     /*
-      Below is a minor "optimization".  If the key (i.e., key number
-      0) has the HA_NOSAME flag set, we know that we have found the
-      correct record (since there can be no duplicates); otherwise, we
-      have to compare the record with the one found to see if it is
-      the correct one.
-
-      CAVEAT! This behaviour is essential for the replication of,
-      e.g., the mysql.proc table since the correct record *shall* be
-      found using the primary key *only*.  There shall be no
-      comparison of non-PK columns to decide if the correct record is
-      found.  I can see no scenario where it would be incorrect to
-      chose the row to change only using a PK or an UNNI.
-    */
-    if (keyinfo->flags & HA_NOSAME || key == table->s->primary_key)
+       We need to retrieve the old row into record[1] to be able to
+       either update or delete the offending record.  We either:
+
+       - use ha_rnd_pos() with a row-id (available as dupp_row) to the
+         offending row, if that is possible (MyISAM and Blackhole), or else
+
+       - use ha_index_read_idx_map() with the key that is duplicated, to
+         retrieve the offending row.
+     */
+    if (table->file->ha_table_flags() & HA_DUPLICATE_POS)
     {
-      /* Unique does not have non nullable part */
-      if (!(table->key_info->flags & (HA_NULL_PART_KEY)))
+      DBUG_PRINT("info",("Locating offending record using ha_rnd_pos()"));
+
+      if (table->file->inited && (error= table->file->ha_index_end()))
       {
-        table->file->ha_index_end();
-        goto ok;
+        table->file->print_error(error, MYF(0));
+        goto error;
       }
-      else
+      if ((error= table->file->ha_rnd_init(FALSE)))
       {
-        KEY *keyinfo= table->key_info;
-        /*
-          Unique has nullable part. We need to check if there is any field in the
-          BI image that is null and part of UNNI.
-        */
-        bool null_found= FALSE;
-        for (uint i=0; i < keyinfo->key_parts && !null_found; i++)
-        {
-          uint fieldnr= keyinfo->key_part[i].fieldnr - 1;
-          Field **f= table->field+fieldnr;
-          null_found= (*f)->is_null();
-        }
+        table->file->print_error(error, MYF(0));
+        goto error;
+      }
 
-        if (!null_found)
-        {
-          table->file->ha_index_end();
-          goto ok;
-        }
+      error= table->file->ha_rnd_pos(table->record[1], table->file->dup_ref);
 
-        /* else fall through to index scan */
+      table->file->ha_rnd_end();
+      if (error)
+      {
+        DBUG_PRINT("info",("ha_rnd_pos() returns error %d",error));
+        if (error == HA_ERR_RECORD_DELETED)
+          error= HA_ERR_KEY_NOT_FOUND;
+        table->file->print_error(error, MYF(0));
+        goto error;
       }
     }
-
-    is_index_scan=true;
-
-    /*
-      In case key is not unique, we still have to iterate over records found
-      and find the one which is identical to the row given. A copy of the 
-      record we are looking for is stored in record[1].
-     */ 
-    DBUG_PRINT("info",("non-unique index, scanning it to find matching record")); 
-
-    while (record_compare(table, &m_cols))
+    else
     {
-      /*
-        We need to set the null bytes to ensure that the filler bit
-        are all set when returning.  There are storage engines that
-        just set the necessary bits on the bytes and don't set the
-        filler bits correctly.
+      DBUG_PRINT("info",("Locating offending record using index_read_idx()"));
 
-        TODO[record format ndb]: Remove this code once NDB returns the
-        correct record format.
-      */
-      if (table->s->null_bytes > 0)
+      if (table->file->extra(HA_EXTRA_FLUSH_CACHE))
       {
-        table->record[0][table->s->null_bytes - 1]|=
-          256U - (1U << table->s->last_null_bit_pos);
+        DBUG_PRINT("info",("Error when setting HA_EXTRA_FLUSH_CACHE"));
+        error= my_errno;
+        goto error;
+      }
+
+      if (key.get() == NULL)
+      {
+        key.assign(static_cast<char*>(my_alloca(table->s->max_unique_length)));
+        if (key.get() == NULL)
+        {
+          DBUG_PRINT("info",("Can't allocate key buffer"));
+          error= ENOMEM;
+          goto error;
+        }
       }
 
-      while ((error= table->file->ha_index_next(table->record[0])))
+      key_copy((uchar*)key.get(), table->record[0], table->key_info + keynum,
+               0);
+      error= table->file->ha_index_read_idx_map(table->record[1], keynum,
+                                                (const uchar*)key.get(),
+                                                HA_WHOLE_KEY,
+                                                HA_READ_KEY_EXACT);
+      if (error)
       {
-        /* We just skip records that has already been deleted */
+        DBUG_PRINT("info",("ha_index_read_idx_map() returns %s", HA_ERR(error)));
         if (error == HA_ERR_RECORD_DELETED)
-          continue;
-        DBUG_PRINT("info",("no record matching the given row found"));
+          error= HA_ERR_KEY_NOT_FOUND;
         table->file->print_error(error, MYF(0));
-        table->file->ha_index_end();
-        goto err;
+        goto error;
       }
     }
 
     /*
-      Have to restart the scan to be able to fetch the next row.
-    */
-    table->file->ha_index_end();
-    goto ok;
-  }
-
-TABLE_SCAN:
-
-  /* All that we can do now is rely on a table scan */
-  {
-    DBUG_PRINT("info",("locating record using table scan (ha_rnd_next)"));
-
-    int restart_count= 0; // Number of times scanning has restarted from top
+       Now, record[1] should contain the offending row.  That
+       will enable us to update it or, alternatively, delete it (so
+       that we can insert the new row afterwards).
+     */
 
-    /* We don't have a key: search the table using ha_rnd_next() */
-    if ((error= table->file->ha_rnd_init(1)))
+    /*
+      If row is incomplete we will use the record found to fill
+      missing columns.
+    */
+    if (!get_flags(COMPLETE_ROWS_F))
     {
-      DBUG_PRINT("info",("error initializing table scan"
-                         " (ha_rnd_init returns %d)",error));
-      table->file->print_error(error, MYF(0));
-      goto err;
+      restore_record(table,record[1]);
+      error= unpack_current_row(rli, &m_cols);
     }
 
-    is_table_scan= true;
+#ifndef DBUG_OFF
+    DBUG_PRINT("debug",("preparing for update: before and after image"));
+    DBUG_DUMP("record[1] (before)", table->record[1], table->s->reclength);
+    DBUG_DUMP("record[0] (after)", table->record[0], table->s->reclength);
+#endif
 
-    /* Continue until we find the right record or have made a full loop */
-    do
-    {
-  restart_ha_rnd_next:
-      error= table->file->ha_rnd_next(table->record[0]);
+    /*
+       REPLACE is defined as either INSERT or DELETE + INSERT.  If
+       possible, we can replace it with an UPDATE, but that will not
+       work on InnoDB if FOREIGN KEY checks are necessary.
 
-      if (error)
-        DBUG_PRINT("info", ("error: %s", HA_ERR(error)));
-      switch (error) {
+       I (Matz) am not sure of the reason for the last_uniq_key()
+       check as, but I'm guessing that it's something along the
+       following lines.
 
+       Suppose that we got the duplicate key to be a key that is not
+       the last unique key for the table and we perform an update:
+       then there might be another key for which the unique check will
+       fail, so we're better off just deleting the row and inserting
+       the correct row.
+     */
+    if (last_uniq_key(table, keynum) &&
+        !table->file->referenced_by_foreign_key())
+    {
+      DBUG_PRINT("info",("Updating row using ha_update_row()"));
+      error=table->file->ha_update_row(table->record[1],
+                                       table->record[0]);
+      switch (error) {
+                
+      case HA_ERR_RECORD_IS_THE_SAME:
+        DBUG_PRINT("info",("ignoring HA_ERR_RECORD_IS_THE_SAME error from"
+                           " ha_update_row()"));
+        error= 0;
+      
       case 0:
         break;
-
-      /*
-        If the record was deleted, we pick the next one without doing
-        any comparisons.
-      */
-      case HA_ERR_RECORD_DELETED:
-        goto restart_ha_rnd_next;
-
-      case HA_ERR_END_OF_FILE:
-        if (++restart_count < 2)
-        {
-          if ((error= table->file->ha_rnd_init(1)))
-          {
-            table->file->print_error(error, MYF(0));
-            goto err;
-          }
-        }
-        break;
-
-      default:
-        DBUG_PRINT("info", ("Failed to get next record"
-                            " (ha_rnd_next returns %d)",error));
+        
+      default:    
+        DBUG_PRINT("info",("ha_update_row() returns error %d",error));
         table->file->print_error(error, MYF(0));
-        (void) table->file->ha_rnd_end();
-        goto err;
       }
+      
+      goto error;
     }
-    while (restart_count < 2 && record_compare(table, &m_cols));
-    
-    /* 
-      Note: above record_compare will take into accout all record fields 
-      which might be incorrect in case a partial row was given in the event
-     */
-
-    /*
-      Have to restart the scan to be able to fetch the next row.
-    */
-    if (restart_count == 2)
-      DBUG_PRINT("info", ("Record not found"));
     else
-      DBUG_DUMP("record found", table->record[0], table->s->reclength);
-    table->file->ha_rnd_end();
-
-    DBUG_ASSERT(error == HA_ERR_END_OF_FILE || error == 0);
-    goto err;
+    {
+      DBUG_PRINT("info",("Deleting offending row and trying to write new one again"));
+      if ((error= table->file->ha_delete_row(table->record[1])))
+      {
+        DBUG_PRINT("info",("ha_delete_row() returns error %d",error));
+        table->file->print_error(error, MYF(0));
+        goto error;
+      }
+      /* Will retry ha_write_row() with the offending row removed. */
+    }
   }
-ok:
-  if (is_table_scan || is_index_scan)
-    issue_long_find_row_warning(get_type_code(), m_table->alias, 
-                                is_index_scan, rli);
 
-  table->default_column_bitmaps();
-  DBUG_RETURN(0);
+error:
+  m_table->default_column_bitmaps();
+  DBUG_RETURN(error);
+}
 
-err:
-  if (is_table_scan || is_index_scan)
-    issue_long_find_row_warning(get_type_code(), m_table->alias, 
-                                is_index_scan, rli);
+#endif
 
-  table->default_column_bitmaps();
-  DBUG_RETURN(error);
+int
+Write_rows_log_event::do_exec_row(const Relay_log_info *const rli)
+{
+  DBUG_ASSERT(m_table != NULL);
+  int error= write_row(rli, slave_exec_mode == SLAVE_EXEC_MODE_IDEMPOTENT);
+
+  if (error && !thd->is_error())
+  {
+    DBUG_ASSERT(0);
+    my_error(ER_UNKNOWN_ERROR, MYF(0));
+  }
+
+  return error;
 }
 
+#endif /* !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION) */
+
+#ifdef MYSQL_CLIENT
+void Write_rows_log_event::print(FILE *file, PRINT_EVENT_INFO* print_event_info)
+{
+  Rows_log_event::print_helper(file, print_event_info, "Write_rows");
+}
 #endif
 
+/**************************************************************************
+	Delete_rows_log_event member functions
+**************************************************************************/
+
 /*
   Constructor used to build an event for writing to the binary log.
  */
@@ -11376,53 +11943,38 @@ Delete_rows_log_event::Delete_rows_log_e
 
 #if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
 
-int 
+int
 Delete_rows_log_event::do_before_row_operations(const Slave_reporting_capability *const)
 {
+  int error= 0;
+  DBUG_ENTER("Delete_rows_log_event::do_before_row_operations");
   /*
     Increment the global status delete count variable
    */
   if (get_flags(STMT_END_F))
-    status_var_increment(thd->status_var.com_stat[SQLCOM_DELETE]);
-
-  if (m_table->s->keys > 0)
-  {
-    // Allocate buffer for key searches
-    m_key= (uchar*)my_malloc(MAX_KEY_LENGTH, MYF(MY_WME));
-    if (!m_key)
-      return HA_ERR_OUT_OF_MEM;
-  }
+    status_var_increment(thd->status_var.com_stat[SQLCOM_DELETE]);  
+  error= row_operations_scan_and_key_setup();
+  DBUG_RETURN(error);
 
-  return 0;
 }
 
-int 
-Delete_rows_log_event::do_after_row_operations(const Slave_reporting_capability *const, 
+int
+Delete_rows_log_event::do_after_row_operations(const Slave_reporting_capability *const,
                                                int error)
 {
-  /*error= ToDo:find out what this should really be, this triggers close_scan in nbd, returning error?*/
-  m_table->file->ha_index_or_rnd_end();
-  my_free(m_key);
-  m_key= NULL;
-
-  return error;
+  DBUG_ENTER("Delete_rows_log_event::do_after_row_operations");
+  error= row_operations_scan_and_key_teardown(error);
+  DBUG_RETURN(error);
 }
 
 int Delete_rows_log_event::do_exec_row(const Relay_log_info *const rli)
 {
   int error;
   DBUG_ASSERT(m_table != NULL);
-
-  if (!(error= find_row(rli))) 
-  { 
-
-    m_table->mark_columns_per_binlog_row_image();
-    /*
-      Delete the record found, located in record[0]
-    */
-    error= m_table->file->ha_delete_row(m_table->record[0]);
-    m_table->default_column_bitmaps();
-  }
+  /* m_table->record[0] contains the BI */
+  m_table->mark_columns_per_binlog_row_image();
+  error= m_table->file->ha_delete_row(m_table->record[0]);
+  m_table->default_column_bitmaps();
   return error;
 }
 
@@ -11495,91 +12047,36 @@ Update_rows_log_event::Update_rows_log_e
 
 #if !defined(MYSQL_CLIENT) && defined(HAVE_REPLICATION)
 
-int 
+int
 Update_rows_log_event::do_before_row_operations(const Slave_reporting_capability *const)
 {
+  int error= 0;
+  DBUG_ENTER("Update_rows_log_event::do_before_row_operations");
   /*
     Increment the global status update count variable
   */
   if (get_flags(STMT_END_F))
     status_var_increment(thd->status_var.com_stat[SQLCOM_UPDATE]);
+  error= row_operations_scan_and_key_setup();
+  DBUG_RETURN(error);
 
-  if (m_table->s->keys > 0)
-  {
-    // Allocate buffer for key searches
-    m_key= (uchar*)my_malloc(m_table->key_info->key_length, MYF(MY_WME));
-    if (!m_key)
-      return HA_ERR_OUT_OF_MEM;
-  }
-
-  return 0;
 }
 
-int 
-Update_rows_log_event::do_after_row_operations(const Slave_reporting_capability *const, 
+int
+Update_rows_log_event::do_after_row_operations(const Slave_reporting_capability *const,
                                                int error)
 {
-  /*error= ToDo:find out what this should really be, this triggers close_scan in nbd, returning error?*/
-  m_table->file->ha_index_or_rnd_end();
-  my_free(m_key); // Free for multi_malloc
-  m_key= NULL;
-
-  return error;
+  DBUG_ENTER("Update_rows_log_event::do_after_row_operations");
+  error= row_operations_scan_and_key_teardown(error);
+  DBUG_RETURN(error);
 }
 
-int 
+int
 Update_rows_log_event::do_exec_row(const Relay_log_info *const rli)
 {
   DBUG_ASSERT(m_table != NULL);
   int error= 0;
 
-  /**
-     Check if update contains only values in AI for columns that do 
-     not exist on the slave. If it does, we can just unpack the rows 
-     and return (do nothing on the local table).
-
-     NOTE: We do the following optimization and check only if there 
-     are usable values on the AI and disregard the fact that there 
-     might be usable values in the BI. In practice this means that 
-     the slave will not go through find_row (since we have nothing
-     on the record to update, why go looking for it?).
-
-     If we wanted find_row to run anyway, we could move this
-     check after find_row, but then we would have to face the fact
-     that the slave might stop without finding the proper record 
-     (because it might have incomplete BI), even though there were
-     no values in AI.
-
-     On the other hand, if AI has usable values but BI has not,
-     then find_row will return an error (and the error is then
-     propagated as it was already).
-   */
-  if (!is_any_column_signaled_for_table(m_table, &m_cols_ai))
-  {
-    /* 
-      Read and discard images, because:
-      1. AI does not contain any useful values to replay;
-      2. BI is irrelevant if there is nothing useful in AI.
-    */
-    error = unpack_current_row(rli, &m_cols);
-    m_curr_row= m_curr_row_end;
-    error = error | unpack_current_row(rli, &m_cols_ai);
-
-    return error;
-  }
-
-  error= find_row(rli); 
-  if (error)
-  {
-    /*
-      We need to read the second image in the event of error to be
-      able to skip to the next pair of updates
-    */
-    m_curr_row= m_curr_row_end;
-    unpack_current_row(rli, &m_cols_ai);
-    return error;
-  }
-
   /*
     This is the situation after locating BI:
 

=== modified file 'sql/log_event.h'
--- a/sql/log_event.h	2012-04-21 12:11:15 +0000
+++ b/sql/log_event.h	2012-04-24 13:39:42 +0000
@@ -43,6 +43,7 @@
 #include "rpl_record.h"
 #include "rpl_reporting.h"
 #include "sql_class.h"                          /* THD */
+#include "rpl_utility.h"                        /* Hash_slave_rows */
 #endif
 
 /* Forward declarations */
@@ -3833,6 +3834,14 @@ private:
 class Rows_log_event : public Log_event
 {
 public:
+  enum row_lookup_mode {
+       ROW_LOOKUP_UNDEFINED= 0,
+       ROW_LOOKUP_NOT_NEEDED= 1,
+       ROW_LOOKUP_INDEX_SCAN= 2,
+       ROW_LOOKUP_TABLE_SCAN= 3,
+       ROW_LOOKUP_HASH_SCAN= 4
+  };
+
   /**
      Enumeration of the errors that can be returned.
    */
@@ -3926,7 +3935,7 @@ public:
     Note that this member function should only be called for the
     following events:
     - Delete_rows_log_event
-    - Wrirte_rows_log_event
+    - Write_rows_log_event
     - Update_rows_log_event
 
     @param[IN] table The table to compare this events bitmaps 
@@ -4008,6 +4017,19 @@ protected:
   ulong       m_table_id;	/* Table ID */
   MY_BITMAP   m_cols;		/* Bitmap denoting columns available */
   ulong       m_width;          /* The width of the columns bitmap */
+#ifndef MYSQL_CLIENT
+  /**
+     Hash table that will hold the entries for while using HASH_SCAN
+     algorithm to search and update/delete rows.
+   */
+  Hash_slave_rows m_hash;
+
+  /**
+     The algorithm to use while searching for rows using the before
+     image.
+  */
+  uint            m_rows_lookup_algorithm;  
+#endif
   /*
     Bitmap for columns available in the after image, if present. These
     fields are only available for Update_rows events. Observe that the
@@ -4035,9 +4057,12 @@ protected:
   const uchar *m_curr_row;     /* Start of the row being processed */
   const uchar *m_curr_row_end; /* One-after the end of the current row */
   uchar    *m_key;      /* Buffer to keep key value during searches */
+  uchar    *last_hashed_key;
+  uint     m_key_index;
+  List<uchar> m_distinct_key_list;
+  List_iterator_fast<uchar> m_itr;
 
   int find_row(const Relay_log_info *const);
-  int write_row(const Relay_log_info *const, const bool);
 
   // Unpack the current row into m_table->record[0]
   int unpack_current_row(const Relay_log_info *const rli,
@@ -4053,6 +4078,28 @@ protected:
     ASSERT_OR_RETURN_ERROR(m_curr_row_end <= m_rows_end, HA_ERR_CORRUPT_EVENT);
     return result;
   }
+
+  /*
+    This member function is called when deciding the algorithm to be used to
+    find the rows to be updated on the slave during row based replication.
+    This this functions sets the m_rows_lookup_algorithm and also the
+    m_key_index with the key index to be used if the algorithm is dependent on
+    an index.
+   */
+  void decide_row_lookup_algorithm_and_key();
+
+  /*
+    Encapsulates the  operations to be done before applying
+    row event for update and delete.
+   */
+  int row_operations_scan_and_key_setup();
+
+  /*
+   Encapsulates the  operations to be done after applying
+   row event for update and delete.
+  */
+  int row_operations_scan_and_key_teardown(int error);
+
 #endif
 
 private:
@@ -4112,6 +4159,92 @@ private:
       
   */
   virtual int do_exec_row(const Relay_log_info *const rli) = 0;
+
+  /**
+    Private member function called while handling idempotent errors.
+
+    @param err[IN/OUT] the error to handle. If it is listed as
+                       idempotent related error, then it is cleared.
+    @returns true if the slave should stop executing rows.
+   */
+  int handle_idempotent_errors(Relay_log_info const *rli, int *err);
+
+  /**
+     Private member function called after updating/deleting a row. It
+     performs some assertions and more importantly, it updates
+     m_curr_row so that the next row is processed during the row
+     execution main loop (@c Rows_log_event::do_apply_event()).
+
+     @param err[IN] the current error code.
+   */
+  void do_post_row_operations(Relay_log_info const *rli, int err);
+
+  /**
+     Commodity wrapper around do_exec_row(), that deals with resetting
+     the thd reference in the table.
+   */
+  int do_apply_row(Relay_log_info const *rli);
+
+  /**
+     Implementation of the index scan and update algorithm. It uses
+     PK, UK or regular Key to search for the record to update. When
+     found it updates it.
+   */
+  int do_index_scan_and_update(Relay_log_info const *rli);
+  
+  /**
+     Implementation of the hash_scan and update algorithm. It collects
+     rows positions in a hashtable until the last row is
+     unpacked. Then it scans the table to update and when a record in
+     the table matches the one in the hashtable, the update/delete is
+     performed.
+   */
+  int do_hash_scan_and_update(Relay_log_info const *rli);
+
+  /**
+     Implementation of the legacy table_scan and update algorithm. For
+     each unpacked row it scans the storage engine table for a
+     match. When a match is found, the update/delete operations are
+     performed.
+   */
+  int do_table_scan_and_update(Relay_log_info const *rli);
+
+/**
+  Initializes scanning of rows. Opens an index and initailizes an iterator
+  over a list of distinct keys (m_distinct_key_list) if it is a HASH_SCAN
+  over an index or the table if its a HASH_SCAN over the table.
+*/
+  int open_record_scan();
+
+/**
+   Does the cleanup
+     -  deallocates all the elements in m_distinct_key_list if any
+     -  closes the index if opened by open_record_scan
+     -  closes the table if opened for scanning.
+*/
+  int close_record_scan();
+
+/**
+  Fetches next row. If it is a HASH_SCAN over an index, it populates
+  table->record[0] with the next row corresponding to the index. If
+  the indexes are in non-contigous ranges it fetches record corresponding
+  to the key value in the next range.
+
+  @parms: bool first_read : signifying if this is the first time we are reading a row
+          over an index.
+  @return_value: -  error code when there are no more reeords to be fetched or some other
+                    error occured,
+                 -  0 otherwise.
+*/
+  int next_record_scan(bool first_read);
+
+/**
+  Populates the m_distinct_key_list with unique keys to be modified
+  during HASH_SCAN over keys.
+  @return_value -0 success
+                -Err_code
+*/
+  int add_key_to_distinct_keyset();
 #endif /* defined(MYSQL_SERVER) && defined(HAVE_REPLICATION) */
 
   friend class Old_rows_log_event;
@@ -4155,6 +4288,9 @@ public:
   }
 #endif
 
+protected:
+  int write_row(const Relay_log_info *const, const bool);
+
 private:
   virtual Log_event_type get_type_code() { return (Log_event_type)TYPE_CODE; }
 

=== modified file 'sql/mysqld.cc'
--- a/sql/mysqld.cc	2012-04-27 17:16:36 +0000
+++ b/sql/mysqld.cc	2012-05-02 12:04:42 +0000
@@ -503,6 +503,10 @@ ulong slave_exec_mode_options;
 ulonglong slave_type_conversions_options;
 ulong opt_mts_slave_parallel_workers;
 ulonglong opt_mts_pending_jobs_size_max;
+ulonglong slave_rows_search_algorithms_options;
+#ifndef DBUG_OFF
+uint slave_rows_last_search_algorithm_used;
+#endif
 ulong binlog_cache_size=0;
 ulonglong  max_binlog_cache_size=0;
 ulong binlog_stmt_cache_size=0;
@@ -6908,6 +6912,21 @@ static int show_heartbeat_period(THD *th
   return 0;
 }
 
+#ifndef DBUG_OFF
+static int show_slave_rows_last_search_algorithm_used(THD *thd, SHOW_VAR *var, char *buff)
+{
+  uint res= slave_rows_last_search_algorithm_used;
+  const char* s= ((res == Rows_log_event::ROW_LOOKUP_TABLE_SCAN) ? "TABLE_SCAN" :
+                  ((res == Rows_log_event::ROW_LOOKUP_HASH_SCAN) ? "HASH_SCAN" : 
+                   "INDEX_SCAN"));
+
+  var->type= SHOW_CHAR;
+  var->value= buff;
+  sprintf(buff, "%s", s);
+
+  return 0;
+}
+#endif
 
 #endif /* HAVE_REPLICATION */
 
@@ -7389,6 +7408,9 @@ SHOW_VAR status_vars[]= {
   {"Slave_heartbeat_period",   (char*) &show_heartbeat_period, SHOW_FUNC},
   {"Slave_received_heartbeats",(char*) &show_slave_received_heartbeats, SHOW_FUNC},
   {"Slave_last_heartbeat",     (char*) &show_slave_last_heartbeat, SHOW_FUNC},
+#ifndef DBUG_OFF
+  {"Slave_rows_last_search_algorithm_used",(char*) &show_slave_rows_last_search_algorithm_used, SHOW_FUNC},
+#endif
   {"Slave_running",            (char*) &show_slave_running,     SHOW_FUNC},
 #endif
   {"Slow_launch_threads",      (char*) &slow_launch_threads,    SHOW_LONG},

=== modified file 'sql/mysqld.h'
--- a/sql/mysqld.h	2012-04-27 17:16:36 +0000
+++ b/sql/mysqld.h	2012-05-02 12:04:42 +0000
@@ -117,6 +117,10 @@ extern ulong slave_exec_mode_options;
 extern ulonglong slave_type_conversions_options;
 extern my_bool read_only, opt_readonly;
 extern my_bool lower_case_file_system;
+extern ulonglong slave_rows_search_algorithms_options;
+#ifndef DBUG_OFF
+extern uint slave_rows_last_search_algorithm_used;
+#endif
 extern my_bool opt_enable_named_pipe, opt_sync_frm, opt_allow_suspicious_udfs;
 extern my_bool opt_secure_auth;
 extern char* opt_secure_file_priv;

=== modified file 'sql/rpl_utility.cc'
--- a/sql/rpl_utility.cc	2012-04-21 12:11:15 +0000
+++ b/sql/rpl_utility.cc	2012-04-24 13:39:42 +0000
@@ -1060,7 +1060,7 @@ table_def::table_def(unsigned char *type
       }
       case MYSQL_TYPE_BIT:
       {
-        uint16 x= field_metadata[index++]; 
+        uint16 x= field_metadata[index++];
         x = x + (field_metadata[index++] << 8U);
         m_field_metadata[i]= x;
         break;
@@ -1127,7 +1127,7 @@ bool event_checksum_test(uchar *event_bu
     if (event_buf[EVENT_TYPE_OFFSET] == FORMAT_DESCRIPTION_EVENT)
     {
 #ifndef DBUG_OFF
-      int8 fd_alg= event_buf[event_len - BINLOG_CHECKSUM_LEN - 
+      int8 fd_alg= event_buf[event_len - BINLOG_CHECKSUM_LEN -
                              BINLOG_CHECKSUM_ALG_DESC_LEN];
 #endif
       /*
@@ -1136,8 +1136,8 @@ bool event_checksum_test(uchar *event_bu
       flags= uint2korr(event_buf + FLAGS_OFFSET);
       if (flags & LOG_EVENT_BINLOG_IN_USE_F)
         event_buf[FLAGS_OFFSET] &= ~LOG_EVENT_BINLOG_IN_USE_F;
-      /* 
-         The only algorithm currently is CRC32. Zero indicates 
+      /*
+         The only algorithm currently is CRC32. Zero indicates
          the binlog file is checksum-free *except* the FD-event.
       */
       DBUG_ASSERT(fd_alg == BINLOG_CHECKSUM_ALG_CRC32 || fd_alg == 0);
@@ -1150,7 +1150,7 @@ bool event_checksum_test(uchar *event_bu
     incoming= uint4korr(event_buf + event_len - BINLOG_CHECKSUM_LEN);
     computed= my_checksum(0L, NULL, 0);
     /* checksum the event content but the checksum part itself */
-    computed= my_checksum(computed, (const uchar*) event_buf, 
+    computed= my_checksum(computed, (const uchar*) event_buf,
                           event_len - BINLOG_CHECKSUM_LEN);
     if (flags != 0)
     {
@@ -1163,6 +1163,317 @@ bool event_checksum_test(uchar *event_bu
   return DBUG_EVALUATE_IF("simulate_checksum_test_failure", TRUE, res);
 }
 
+#ifndef MYSQL_CLIENT
+
+#define HASH_ROWS_POS_SEARCH_INVALID -1
+
+/**
+  Utility methods for handling row based operations.
+ */
+
+static uchar*
+hash_slave_rows_get_key(const uchar *record,
+                        size_t *length,
+                        my_bool not_used __attribute__((unused)))
+{
+  DBUG_ENTER("get_key");
+
+  HASH_ROW_ENTRY *entry=(HASH_ROW_ENTRY *) record;
+  HASH_ROW_PREAMBLE *preamble= entry->preamble;
+  *length= preamble->length;
+
+  DBUG_RETURN((uchar*) &preamble->hash_value);
+}
+
+static void
+hash_slave_rows_free_entry(HASH_ROW_ENTRY *entry)
+{
+  DBUG_ENTER("free_entry");
+  if (entry)
+  {
+    if (entry->preamble)
+      my_free(entry->preamble);
+    if (entry->positions)
+      my_free(entry->positions);
+    my_free(entry);
+  }
+  DBUG_VOID_RETURN;
+}
+
+bool Hash_slave_rows::is_empty(void)
+{
+  return (m_hash.records == 0);
+}
+
+/**
+   Hashing commodity structures and functions.
+ */
+
+bool Hash_slave_rows::init(void)
+{
+  if (my_hash_init(&m_hash,
+                   &my_charset_bin,                /* the charater set information */
+                   16 /* TODO */,                  /* growth size */
+                   0,                              /* key offset */
+                   0,                              /* key length */
+                   hash_slave_rows_get_key,                        /* get function pointer */
+                   (my_hash_free_key) hash_slave_rows_free_entry,  /* freefunction pointer */
+                   MYF(0)))                        /* flags */
+    return true;
+  return false;
+}
+
+bool Hash_slave_rows::deinit(void)
+{
+  if (my_hash_inited(&m_hash))
+    my_hash_free(&m_hash);
+
+  return 0;
+}
+
+int Hash_slave_rows::size()
+{
+  return m_hash.records;
+}
+
+HASH_ROW_ENTRY* Hash_slave_rows::make_entry(const uchar* bi_start, const uchar* bi_ends,
+                                            const uchar* ai_start, const uchar* ai_ends)
+{
+  DBUG_ENTER("Hash_slave_rows::make_entry");
+
+  HASH_ROW_ENTRY *entry= (HASH_ROW_ENTRY*) my_malloc(sizeof(HASH_ROW_ENTRY), MYF(0));
+  HASH_ROW_PREAMBLE *preamble= (HASH_ROW_PREAMBLE *) my_malloc(sizeof(HASH_ROW_PREAMBLE), MYF(0));
+  HASH_ROW_POS *pos= (HASH_ROW_POS *) my_malloc(sizeof(HASH_ROW_POS), MYF(0));
+
+  if (!entry || !preamble || !pos)
+    goto err;
+
+  /**
+     Filling in the preamble.
+   */
+  preamble->hash_value= 0;
+  preamble->length= sizeof(my_hash_value_type);
+  preamble->search_state= HASH_ROWS_POS_SEARCH_INVALID;
+  preamble->is_search_state_inited= false;
+
+  /**
+     Filling in the positions.
+   */
+  pos->bi_start= (const uchar *) bi_start;
+  pos->bi_ends= (const uchar *) bi_ends;
+  pos->ai_start= (const uchar *) ai_start;
+  pos->ai_ends= (const uchar *) ai_ends;
+
+  /**
+    Filling in the entry
+   */
+  entry->preamble= preamble;
+  entry->positions= pos;
+
+  DBUG_RETURN(entry);
+
+err:
+  if (entry)
+    my_free(entry);
+  if (preamble)
+    my_free(entry);
+  if (pos)
+    my_free(pos);
+  DBUG_RETURN(NULL);
+}
+
+bool
+Hash_slave_rows::put(TABLE *table,
+                     MY_BITMAP *cols,
+                     HASH_ROW_ENTRY* entry)
+{
+
+  DBUG_ENTER("Hash_slave_rows::put");
+
+  HASH_ROW_PREAMBLE* preamble= entry->preamble;
+
+  /**
+     Skip blobs and BIT fields from key calculation.
+     Handle X bits.
+     Handle nulled fields.
+     Handled fields not signaled.
+  */
+  preamble->hash_value= make_hash_key(table, cols);
+
+  my_hash_insert(&m_hash, (uchar *) entry);
+  DBUG_PRINT("debug", ("Added record to hash with key=%u", preamble->hash_value));
+  DBUG_RETURN(false);
+}
+
+HASH_ROW_ENTRY*
+Hash_slave_rows::get(TABLE *table, MY_BITMAP *cols)
+{
+  DBUG_ENTER("Hash_slave_rows::get");
+  HASH_SEARCH_STATE state;
+  my_hash_value_type key;
+  HASH_ROW_ENTRY *entry= NULL;
+
+  key= make_hash_key(table, cols);
+
+  DBUG_PRINT("debug", ("Looking for record with key=%u in the hash.", key));
+
+  entry= (HASH_ROW_ENTRY*) my_hash_first(&m_hash,
+                                         (const uchar*) &key,
+                                         sizeof(my_hash_value_type),
+                                         &state);
+  if (entry)
+  {
+    DBUG_PRINT("debug", ("Found record with key=%u in the hash.", key));
+
+    /**
+       Save the search state in case we need to go through entries for
+       the given key.
+    */
+    entry->preamble->search_state= state;
+    entry->preamble->is_search_state_inited= true;
+  }
+
+  DBUG_RETURN(entry);
+}
+
+bool Hash_slave_rows::next(HASH_ROW_ENTRY** entry)
+{
+  DBUG_ENTER("Hash_slave_rows::next");
+  DBUG_ASSERT(*entry);
+
+  if (*entry == NULL)
+    DBUG_RETURN(true);
+
+  HASH_ROW_PREAMBLE *preamble= (*entry)->preamble;
+
+  if (!preamble->is_search_state_inited)
+    DBUG_RETURN(true);
+
+  my_hash_value_type key= preamble->hash_value;
+  HASH_SEARCH_STATE state= preamble->search_state;
+
+  /*
+    Invalidate search for current preamble, because it is going to be
+    used in the search below (and search state is used in a
+    one-time-only basis).
+   */
+  preamble->search_state= HASH_ROWS_POS_SEARCH_INVALID;
+  preamble->is_search_state_inited= false;
+
+  DBUG_PRINT("debug", ("Looking for record with key=%u in the hash (next).", key));
+
+  /**
+     Do the actual search in the hash table.
+   */
+  *entry= (HASH_ROW_ENTRY*) my_hash_next(&m_hash,
+                                         (const uchar*) &key,
+                                         sizeof(my_hash_value_type),
+                                         &state);
+  if (*entry)
+  {
+    DBUG_PRINT("debug", ("Found record with key=%u in the hash (next).", key));
+    preamble= (*entry)->preamble;
+
+    /**
+       Save the search state for next iteration (if any).
+     */
+    preamble->search_state= state;
+    preamble->is_search_state_inited= true;
+  }
+
+  DBUG_RETURN(false);
+}
+
+bool
+Hash_slave_rows::del(HASH_ROW_ENTRY *entry)
+{
+  DBUG_ENTER("Hash_slave_rows::del");
+  DBUG_ASSERT(entry);
+
+  if (my_hash_delete(&m_hash, (uchar *) entry))
+    DBUG_RETURN(true);
+  DBUG_RETURN(false);
+}
+
+my_hash_value_type
+Hash_slave_rows::make_hash_key(TABLE *table, MY_BITMAP *cols)
+{
+  DBUG_ENTER("Hash_slave_rows::make_hash_key");
+  ha_checksum crc= 0L;
+
+  uchar *record= table->record[0];
+  uchar saved_x= 0, saved_filler= 0;
+
+  if (table->s->null_bytes > 0)
+  {
+    /*
+      If we have an X bit then we need to take care of it.
+    */
+    if (!(table->s->db_options_in_use & HA_OPTION_PACK_RECORD))
+    {
+      saved_x= record[0];
+      record[0]|= 1U;
+    }
+
+    /*
+      If (last_null_bit_pos == 0 && null_bytes > 1), then:
+      X bit (if any) + N nullable fields + M Field_bit fields = 8 bits
+      Ie, the entire byte is used.
+    */
+    if (table->s->last_null_bit_pos > 0)
+    {
+      saved_filler= record[table->s->null_bytes - 1];
+      record[table->s->null_bytes - 1]|=
+        256U - (1U << table->s->last_null_bit_pos);
+    }
+  }
+
+  /*
+    We can only checksum the bytes if all fields have been signaled
+    in the before image. Otherwise, unpack_row will not have set the
+    null_flags correctly (because it only unpacks those fields and
+    their flags that were actually in the before image).
+
+    @c record_compare, as it also skips null_flags if the read_set
+    was not marked completely.
+   */
+  if (bitmap_is_set_all(cols))
+    crc= my_checksum(crc, table->null_flags, table->s->null_bytes);
+
+  for (Field **ptr=table->field ;
+       *ptr && ((*ptr)->field_index < cols->n_bits);
+       ptr++)
+  {
+    Field *f= (*ptr);
+
+    /* field is set in the read_set and is not a blob or a BIT */
+    if (bitmap_is_set(cols, f->field_index) &&
+        (f->type() != MYSQL_TYPE_BLOB) && (f->type() != MYSQL_TYPE_BIT))
+      crc= my_checksum(crc, f->ptr, f->data_length());
+  }
+
+  /*
+    Restore the saved bytes.
+
+    TODO[record format ndb]: Remove this code once NDB returns the
+    correct record format.
+  */
+  if (table->s->null_bytes > 0)
+  {
+    if (!(table->s->db_options_in_use & HA_OPTION_PACK_RECORD))
+      record[0]= saved_x;
+
+    if (table->s->last_null_bit_pos)
+      record[table->s->null_bytes - 1]= saved_filler;
+  }
+
+  DBUG_PRINT("debug", ("Created key=%u", crc));
+  DBUG_RETURN(crc);
+}
+
+
+#endif
+
 #if defined(MYSQL_SERVER) && defined(HAVE_REPLICATION)
 
 Deferred_log_events::Deferred_log_events(Relay_log_info *rli) : last_added(NULL)

=== modified file 'sql/rpl_utility.h'
--- a/sql/rpl_utility.h	2012-04-21 12:11:15 +0000
+++ b/sql/rpl_utility.h	2012-04-24 13:39:42 +0000
@@ -26,9 +26,197 @@
 #include "table.h"                              /* TABLE_LIST */
 #endif
 #include "mysql_com.h"
+#include <hash.h>
+
 
 class Relay_log_info;
 class Log_event;
+#ifndef MYSQL_CLIENT
+
+/**
+   Hash table used when applying row events on the slave and there is
+   no index on the slave's table.
+ */
+
+typedef struct hash_row_pos_st
+{
+  /** 
+      Points at the position where the row starts in the
+      event buffer (ie, area in memory before unpacking takes
+      place).
+  */
+  const uchar *bi_start;
+  const uchar *bi_ends;
+
+  const uchar *ai_start;
+  const uchar *ai_ends;
+
+} HASH_ROW_POS;
+
+
+/**
+   Internal structure that acts as a preamble for HASH_ROW_POS
+   in memory structure. 
+   
+   Allocation is done in Hash_slave_rows::make_entry as part of 
+   the entry allocation.
+ */
+typedef struct hash_row_preamble_st
+{
+  /*
+    The actual key.
+   */
+  my_hash_value_type hash_value;
+
+  /**  
+    Length of the key.
+   */
+  uint length;
+
+  /**  
+    The search state used to iterate over multiple entries for a
+    given key.
+   */
+  HASH_SEARCH_STATE search_state;
+
+  /**  
+    Wether this search_state is usable or not.
+   */
+  bool is_search_state_inited;
+
+} HASH_ROW_PREAMBLE;
+
+typedef struct hash_row_entry_st
+{
+  HASH_ROW_PREAMBLE *preamble;
+  HASH_ROW_POS *positions;
+} HASH_ROW_ENTRY;
+
+class Hash_slave_rows 
+{
+public:
+
+  /**
+     Allocates an entry to be added to the hash table. It should be
+     called before calling member function add.
+     
+     @param bi_start the position to where in the rows buffer the
+                     before image begins.
+     @param bi_ends  the position to where in the rows buffer the
+                     before image ends.
+     @param ai_start the position to where in the rows buffer the 
+                     after image starts (if any).
+     @param ai_ends  the position to where in the rows buffer the
+                     after image ends (if any).
+     @returns NULL if a problem occured, a valid pointer otherwise.
+   */
+  HASH_ROW_ENTRY* make_entry(const uchar *bi_start, const uchar *bi_ends,
+                             const uchar *ai_start, const uchar *ai_ends);
+
+  /**
+     Puts data into the hash table.
+
+     @param table   The table holding the buffer used to calculate the
+                    key, ie, table->record[0].
+     @param cols    The read_set bitmap signaling which columns are used.
+     @param entry   The entry with the values to store.
+
+     @returns true if something went wrong, false otherwise.
+   */
+  bool put(TABLE* table, MY_BITMAP *cols, HASH_ROW_ENTRY* entry);
+
+  /**
+     Gets the entry, from the hash table, that matches the data in
+     table->record[0] and signaled using cols.
+     
+     @param table   The table holding the buffer containing data used to
+                    make the entry lookup.
+     @param cols    Bitmap signaling which columns, from
+                    table->record[0], should be used.
+
+     @returns a pointer that will hold a reference to the entry
+              found. If the entry is not found then NULL shall be
+              returned.
+   */
+  HASH_ROW_ENTRY* get(TABLE *table, MY_BITMAP *cols);
+
+  /**
+     Gets the entry that stands next to the one pointed to by
+     *entry. Before calling this member function, the entry that one
+     uses as parameter must have: 1. been obtained through get() or
+     next() invocations; and 2. must have not been used before in a
+     next() operation.
+
+     @param entry[IN/OUT] contains a pointer to an entry that we can
+                          use to search for another adjacent entry
+                          (ie, that shares the same key).
+
+     @returns true if something went wrong, false otherwise. In the
+              case that this entry was already used in a next()
+              operation this member function returns true and does not
+              update the pointer.
+   */
+  bool next(HASH_ROW_ENTRY** entry);
+
+  /**
+     Deletes the entry pointed by entry. It also frees memory used
+     holding entry contents. This is the way to release memeory 
+     used for entry, freeing it explicitly with my_free will cause
+     undefined behavior.
+
+     @param entry  Pointer to the entry to be deleted.
+     @returns true if something went wrong, false otherwise.
+   */
+  bool del(HASH_ROW_ENTRY* entry);
+
+  /**
+     Initializes the hash table.
+
+     @returns true if something went wrong, false otherwise.
+   */
+  bool init(void);
+
+  /**
+     De-initializes the hash table.
+
+     @returns true if something went wrong, false otherwise.
+   */
+  bool deinit(void);
+
+  /**
+     Checks if the hash table is empty or not.
+
+     @returns true if the hash table has zero entries, false otherwise.
+   */
+  bool is_empty(void);
+
+  /**
+     Returns the number of entries in the hash table.
+
+     @returns the number of entries in the hash table.
+   */
+  int size();
+  
+private:
+
+  /**
+     The hashtable itself.
+   */
+  HASH m_hash;
+
+  /**
+     Auxiliar and internal method used to create an hash key, based on
+     the data in table->record[0] buffer and signaled as used in cols.
+
+     @param table  The table that is being scanned
+     @param cols   The read_set bitmap signaling which columns are used.
+
+     @retuns the hash key created.
+   */
+  my_hash_value_type make_hash_key(TABLE *table, MY_BITMAP* cols);
+};
+
+#endif
 
 /**
   A table definition from the master.
@@ -301,3 +489,4 @@ public:
   } while (0)
 
 #endif /* RPL_UTILITY_H */
+

=== modified file 'sql/sql_class.h'
--- a/sql/sql_class.h	2012-04-25 15:46:11 +0000
+++ b/sql/sql_class.h	2012-05-02 12:04:42 +0000
@@ -88,6 +88,10 @@ enum enum_slave_exec_mode { SLAVE_EXEC_M
                             SLAVE_EXEC_MODE_LAST_BIT };
 enum enum_slave_type_conversions { SLAVE_TYPE_CONVERSIONS_ALL_LOSSY,
                                    SLAVE_TYPE_CONVERSIONS_ALL_NON_LOSSY};
+enum enum_slave_rows_search_algorithms { SLAVE_ROWS_TABLE_SCAN = (1U << 0),
+                                         SLAVE_ROWS_INDEX_SCAN = (1U << 1),
+                                         SLAVE_ROWS_HASH_SCAN  = (1U << 2)};
+
 enum enum_mark_columns
 { MARK_COLUMNS_NONE, MARK_COLUMNS_READ, MARK_COLUMNS_WRITE};
 enum enum_filetype { FILETYPE_CSV, FILETYPE_XML };

=== modified file 'sql/sys_vars.cc'
--- a/sql/sys_vars.cc	2012-04-27 17:16:36 +0000
+++ b/sql/sys_vars.cc	2012-05-02 12:04:42 +0000
@@ -2472,6 +2472,36 @@ static Sys_var_mybool Sys_slave_sql_veri
        "receiving them from the network before writing them to the relay "
        "log. Enabled by default.",
        GLOBAL_VAR(opt_slave_sql_verify_checksum), CMD_LINE(OPT_ARG), DEFAULT(TRUE));
+
+static bool slave_rows_search_algorithms_check(sys_var *self, THD *thd, set_var *var)
+{
+  String str, *res;
+
+  if(!var->value)
+    return false;
+
+  /** empty value ('') is not allowed */
+  res= var->value->val_str(&str);
+  if (res->is_empty())
+    return true;
+
+  return false;
+}
+
+static const char *slave_rows_search_algorithms_names[]= {"TABLE_SCAN", "INDEX_SCAN", "HASH_SCAN", 0};
+static Sys_var_set Slave_rows_search_algorithms(
+       "slave_rows_search_algorithms", 
+       "Set of searching algorithms that the slave will use while "
+       "searching for records from the storage engine to either "
+       "updated or deleted them. Possible values are: INDEX_SCAN, "
+       "TABLE_SCAN and HASH_SCAN. Any combination is allowed, and "
+       "the slave will always pick the most suitable algorithm for "
+       "any given scenario. "
+       "(Default: INDEX_SCAN, TABLE_SCAN).",
+       GLOBAL_VAR(slave_rows_search_algorithms_options), CMD_LINE(REQUIRED_ARG),
+       slave_rows_search_algorithms_names,
+       DEFAULT(SLAVE_ROWS_INDEX_SCAN | SLAVE_ROWS_TABLE_SCAN),  NO_MUTEX_GUARD,
+       NOT_IN_BINLOG, ON_CHECK(slave_rows_search_algorithms_check), ON_UPDATE(NULL));
 #endif
 
 bool Sys_var_enum_binlog_checksum::global_update(THD *thd, set_var *var)

=== modified file 'storage/blackhole/ha_blackhole.h'
--- a/storage/blackhole/ha_blackhole.h	2011-09-07 10:08:09 +0000
+++ b/storage/blackhole/ha_blackhole.h	2012-04-16 15:31:21 +0000
@@ -54,7 +54,7 @@ public:
   {
     return(HA_NULL_IN_KEY | HA_CAN_FULLTEXT | HA_CAN_SQL_HANDLER |
            HA_BINLOG_STMT_CAPABLE | HA_BINLOG_ROW_CAPABLE |
-           HA_CAN_INDEX_BLOBS | HA_AUTO_PART_KEY |
+           HA_CAN_INDEX_BLOBS | HA_AUTO_PART_KEY | HA_READ_OUT_OF_SYNC |
            HA_FILE_BASED | HA_CAN_GEOMETRY | HA_CAN_INSERT_DELAYED);
   }
   ulong index_flags(uint inx, uint part, bool all_parts) const

No bundle (reason: useless for push emails).
Thread
bzr push into mysql-trunk branch (rohit.kalhans:3762 to 3763) WL#5597Rohit Kalhans2 May