Alfranio, hello.
Thanks for your notes!
>>
...
>> @@ -35,4 +36,88 @@ save_master_pos;
>> connection slave;
>> sync_with_master;
>>
>> -# End of 4.1 tests
>> +
>> +#
>> +# Bug #38205 Row-based Replication (RBR) causes inconsistencies...
>> +#
>> +# Verifying that STOP SLAVE does not interrupt excution of a group
>> +# execution of events if the group can not roll back.
>> +# Killing the sql thread continues to provide a "hard" stop (the
>> +# part II, moved to the bugs suite as it's hard to make it
>> +# deterministic with KILL).
>> +#
>> +
>> +#
>> +# Part I. The being stopped sql thread finishes first the current group of
>> +# events if the group contains an event on a non-transaction table.
>> +
>> +connection master;
>> +create table t1i(n int primary key) engine=innodb;
>> +create table t2m(n int primary key) engine=myisam;
>> +begin;
>> +insert into t1i values (1);
>> +insert into t1i values (2);
>> +insert into t1i values (3);
>> +commit;
>> +
>> +sync_slave_with_master;
>> +connection slave;
>> +begin;
>> +insert into t1i values (5);
>> +
>> +connection master;
>> +let $pos0_master= query_get_value(SHOW MASTER STATUS, Position, 1);
>> +begin;
>> +insert into t1i values (4);
>> +insert into t2m values (1); # non-ta update to process
>> +insert into t1i values (5); # to block at. to be played with stopped
>> +commit;
>> +
>> +connection slave;
>> +# slave sql thread must be locked out by the conn `slave' explicit lock
>> +let $pos0_slave= query_get_value(SHOW SLAVE STATUS, Exec_Master_Log_Pos, 1);
>> +--disable_query_log
>> +eval select $pos0_master - $pos0_slave as zero;
>> +--enable_query_log
>> +
>> +connection slave1;
>> +let $count= 1;
>> +let $table= t2m;
>> +source include/wait_until_rows_count.inc;
>> +send stop slave;
>> +
>> +connection slave;
>> +rollback; # release the sql thread
>> +
>>
>
> How many slaves do you have? Two?
> If you have two slave, please describe the architecture of your test.
> Otherwise, please, replace slave1 by slave.
>
> I think you have just one master and one slave.
that's correct.
Were not you confused slave1, slave are not servers but connections to
the slave server?
I need two: one to set up a "stumbling" point for the sql thread, and
the other to issue stop or kill it while the sql thread got stuck in
the middle of executing a transaction.
>> +connection slave1;
>> +reap;
>> +source include/wait_for_slave_to_stop.inc;
>> +let $sql_status= query_get_value(SHOW SLAVE STATUS, Slave_SQL_Running, 1);
>> +--echo *** sql thread is running: $sql_status ***
>> +
>> +
>> +connection master;
>> +let $pos1_master= query_get_value(SHOW MASTER STATUS, Position, 1);
>>
> If you have just one slave, please, this second wait is unnecessary.
>> +
>> +connection slave;
>> +source include/wait_for_slave_sql_to_stop.inc;
>>
> above.
>> +
>> +let $pos1_slave= query_get_value(SHOW SLAVE STATUS, Exec_Master_Log_Pos, 1);
>> +
>> +--echo *** the prove: the stopped slave has finished the current transaction
> ***
>>
> above.
>
>> +
>> +--disable_query_log
>> +select count(*) as five from t1i;
>> +eval select $pos1_master - $pos1_slave as zero;
>> +eval select $pos1_slave > $pos0_slave as one;
>> +--enable_query_log
>> +
>> +start slave;
>> +
>> +# clean-up
>> +connection master;
>> +drop table t1i, t2m;
>> +
>> +sync_slave_with_master;
>> +
>> +# End of tests
>>
>> === modified file 'sql/log_event.cc'
>> --- a/sql/log_event.cc 2009-03-05 10:23:46 +0000
>> +++ b/sql/log_event.cc 2009-03-26 08:25:06 +0000
>> @@ -7114,7 +7114,12 @@ int Rows_log_event::do_apply_event(Relay
>> */
>> lex_start(thd);
>> mysql_reset_thd_for_next_command(thd);
>> -
>> + /*
>> + The current statement is just about to begin and
>> + has not yet modified anything. Note, all.modified is reset
>> + by mysql_reset_thd_for_next_command.
>> + */
>>
> Change this comment because this is not completely true.
> The all.modified is just reset when the following conditions are
> true:
You are right. I should have mentioned the 2nd resetter as well.
The flag is set in two places depending on the auto-increment mode.
In the multi-statement transaction, started with
BEGIN or SET AUTOCOMMIT= OFF
resetting is done in end_active_trans(), otherwise in the mentioned
mysql_reset_thd_for_next_command().
I am referring to the autocommit-OFF case end_active_trans() as well.
>
> If in autocommit mode and not in a transaction, reset
> OPTION_STATUS_NO_TRANS_UPDATE | OPTION_KEEP_LOG to not get warnings
> in ha_rollback_trans() about some tables couldn't be rolled back.
> */
> if (!(thd->options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN)))
> {
> thd->options&= ~OPTION_KEEP_LOG;
> thd->transaction.all.modified_non_trans_table= FALSE;
> }
>
> Otherwise, your patch would be wrong. Maybe you can copy part of the
> comment.
And the comments you are referring got weary:
OPTION_STATUS_NO_TRANS_UPDATE does not exist anymore. I am removing
that mentioning.
>
> Can you point out where this flag is set by the replication?
The flags are not just for replication. As said in above
the idea is also to " get warnings in ha_rollback_trans() about some tables
couldn't be rolled back".
The all flags got reset at committing, end_active_trans() (auto-commit OFF) and
a statement start, mysql_reset_thd_for_next_command() (auto-commit ON).
For BRB we need to reset thd->transaction.stmt.modified_non_trans_table
as an analog for the query post-exection thd->transaction.stmt.reset().
It does not really matter if RBR resetting happens differently at the
beginning.
>> + thd->transaction.stmt.modified_non_trans_table= FALSE;
>> /*
>> Check if the slave is set to use SBR. If so, it should switch
>> to using RBR until the end of the "statement", i.e., next
>> @@ -7217,6 +7222,7 @@ int Rows_log_event::do_apply_event(Relay
>>
>> if (table)
>> {
>> + bool transactional_table= table->file->has_transactions();
>> /*
>> table == NULL means that this table should not be replicated
>> (this was set up by Table_map_log_event::do_apply_event()
>> @@ -7348,6 +7354,9 @@ int Rows_log_event::do_apply_event(Relay
>>
>> m_curr_row= m_curr_row_end;
>>
>> + if (error == 0 && !transactional_table)
>> + thd->transaction.all.modified_non_trans_table=
>> + thd->transaction.stmt.modified_non_trans_table= TRUE;
>> } // row processing loop
>>
>> DBUG_EXECUTE_IF("STOP_SLAVE_after_first_Rows_event",
>>
>> === modified file 'sql/slave.cc'
>> --- a/sql/slave.cc 2009-01-09 12:49:24 +0000
>> +++ b/sql/slave.cc 2009-03-26 08:25:06 +0000
>> @@ -687,6 +687,9 @@ static bool sql_slave_killed(THD* thd, R
>> DBUG_ASSERT(rli->slave_running == 1);// tracking buffer overrun
>> if (abort_loop || thd->killed || rli->abort_slave)
>> {
>> + if (rli->abort_slave && rli->is_in_group() &&
>> + thd->transaction.all.modified_non_trans_table)
>> + DBUG_RETURN(0);
>> /*
>> If we are in an unsafe situation (stopping could corrupt replication),
>> we give one minute to the slave SQL thread of grace before really
>>
>>
cheers,
Andrei