5287 Andrei Elkin 2013-07-08
BUG#16594095
Post-push refinement to fix a valgrind issue.
The basic pushed patch did not expect a block of
if (!ret_worker->checkpoint_notified) {}
be executed multiple times during scheduling of one group.
In fact it should have been left intact to continue to be executed at T-event
time that guarantees its single time run.
And that is restored, that is the former patch part is reverted.
As the matter of fact the physical coordinates were unknown to the Worker
only during time of the first transaction scheduling upon master binlog
rotation or the very first one at MTS start.
Information about the master binlog at such points is passed to the Worker
now via augmented notification mechanism.
The new notification is light as in implementation so in terms of execution
(is supposed to be pretty rare) and never more that once during one transaction
scheduling (as asserted in the DBUG version of the patch).
Attention to rpl_conflicts.test is paid to sort out
rpl_{row,stm}conflicts.test failing on PB2.
modified:
mysql-test/extra/rpl_tests/rpl_conflicts.test
mysql-test/suite/rpl/r/rpl_row_conflicts.result
mysql-test/suite/rpl/r/rpl_stm_conflicts.result
sql/log_event.cc
sql/rpl_rli.cc
sql/rpl_rli_pdb.cc
sql/rpl_rli_pdb.h
=== modified file 'mysql-test/extra/rpl_tests/rpl_conflicts.test'
--- a/mysql-test/extra/rpl_tests/rpl_conflicts.test revid:anitha.gopi@stripped
+++ b/mysql-test/extra/rpl_tests/rpl_conflicts.test revid:andrei.elkin@stripped
@@ -89,8 +89,11 @@ if (`SELECT @@global.binlog_format != 'R
--echo ---- Wait until slave stops with an error ----
# Wait until the slave tries to run the query, fails with duplicate
# key error, and stops the SQL thread.
- let $slave_sql_errno= convert_error(ER_DUP_ENTRY)
+
+ let $slave_sql_errno= convert_error(ER_DUP_ENTRY);
source include/wait_for_slave_sql_error.inc;
+ --let $errno= query_get_value("SHOW SLAVE STATUS", Last_SQL_Errno, 1)
+ --eval SELECT "$errno" as 'Last_SQL_Errno'
call mtr.add_suppression("Slave SQL.*Duplicate entry .1. for key .PRIMARY.* Error_code: 1062");
call mtr.add_suppression("The slave coordinator and worker threads are stopped, possibly leaving data in inconsistent state");
@@ -139,8 +142,11 @@ connection slave;
if (`SELECT @@global.binlog_format = 'ROW' AND @@global.slave_exec_mode = 'STRICT'`) {
--echo ---- Wait until slave stops with an error ----
call mtr.add_suppression("Slave SQL.*Can.t find record in .t1., Error_code: 1032");
- let $slave_sql_errno= convert_error(ER_KEY_NOT_FOUND)
+
+ let $slave_sql_errno= convert_error(ER_KEY_NOT_FOUND);
source include/wait_for_slave_sql_error.inc;
+ --let $errno= query_get_value("SHOW SLAVE STATUS", Last_SQL_Errno, 1)
+ --eval SELECT "$errno" as 'Last_SQL_Errno'
SELECT * FROM t1;
=== modified file 'mysql-test/suite/rpl/r/rpl_row_conflicts.result'
--- a/mysql-test/suite/rpl/r/rpl_row_conflicts.result revid:anitha.gopi@stripped
+++ b/mysql-test/suite/rpl/r/rpl_row_conflicts.result revid:andrei.elkin@stripped
@@ -23,6 +23,10 @@ a
1
[on slave]
---- Wait until slave stops with an error ----
+include/wait_for_slave_sql_error.inc [errno=1062]
+SELECT "1062" as 'Last_SQL_Errno';
+Last_SQL_Errno
+1062
call mtr.add_suppression("Slave SQL.*Duplicate entry .1. for key .PRIMARY.* Error_code: 1062");
call mtr.add_suppression("The slave coordinator and worker threads are stopped, possibly leaving data in inconsistent state");
SELECT * FROM t1;
@@ -52,6 +56,10 @@ a
[on slave]
---- Wait until slave stops with an error ----
call mtr.add_suppression("Slave SQL.*Can.t find record in .t1., Error_code: 1032");
+include/wait_for_slave_sql_error.inc [errno=1032]
+SELECT "1032" as 'Last_SQL_Errno';
+Last_SQL_Errno
+1032
SELECT * FROM t1;
a
---- Resolve the conflict on the slave and restart SQL thread ----
=== modified file 'mysql-test/suite/rpl/r/rpl_stm_conflicts.result'
--- a/mysql-test/suite/rpl/r/rpl_stm_conflicts.result revid:anitha.gopi@stripped
+++ b/mysql-test/suite/rpl/r/rpl_stm_conflicts.result revid:andrei.elkin@stripped
@@ -18,6 +18,10 @@ a
1
[on slave]
---- Wait until slave stops with an error ----
+include/wait_for_slave_sql_error.inc [errno=1062]
+SELECT "1062" as 'Last_SQL_Errno';
+Last_SQL_Errno
+1062
call mtr.add_suppression("Slave SQL.*Duplicate entry .1. for key .PRIMARY.* Error_code: 1062");
call mtr.add_suppression("The slave coordinator and worker threads are stopped, possibly leaving data in inconsistent state");
SELECT * FROM t1;
=== modified file 'sql/log_event.cc'
--- a/sql/log_event.cc revid:anitha.gopi@stripped
+++ b/sql/log_event.cc revid:andrei.elkin@stripped
@@ -2863,23 +2863,22 @@ Slave_worker *Log_event::get_slave_worke
DBUG_ASSERT(ret_worker);
/*
- Preparing event physical coordinates info for Worker before any event got
- scheduled so when Worker error-stopped at the first event it would be aware of
- where exactly in the event stream.
+ Preparing event physical coordinates info for Worker before any
+ event got scheduled so when Worker error-stopped at the first
+ event it would be aware of where exactly in the event stream.
*/
- if (!ret_worker->checkpoint_notified)
+ if (!ret_worker->master_log_change_notified)
{
if (!ptr_group)
ptr_group= gaq->get_job_group(rli->gaq->assigned_group_index);
- ptr_group->checkpoint_log_name=
+ ptr_group->group_master_log_name=
my_strdup(rli->get_group_master_log_name(), MYF(MY_WME));
- ptr_group->checkpoint_log_pos= rli->get_group_master_log_pos();
- ptr_group->checkpoint_relay_log_name=
- my_strdup(rli->get_group_relay_log_name(), MYF(MY_WME));
- ptr_group->checkpoint_relay_log_pos= rli->get_group_relay_log_pos();
- ptr_group->shifted= ret_worker->bitmap_shifted;
- ret_worker->bitmap_shifted= 0;
- ret_worker->checkpoint_notified= TRUE;
+ ret_worker->master_log_change_notified= true;
+
+ DBUG_ASSERT(!ptr_group->notified);
+#ifndef DBUG_OFF
+ ptr_group->notified= true;
+#endif
}
// T-event: Commit, Xid, a DDL query or dml query of B-less group.
@@ -2921,6 +2920,21 @@ Slave_worker *Log_event::get_slave_worke
ret_worker->relay_log_change_notified= TRUE;
}
+
+ if (!ret_worker->checkpoint_notified)
+ {
+ if (!ptr_group)
+ ptr_group= gaq->get_job_group(rli->gaq->assigned_group_index);
+ ptr_group->checkpoint_log_name=
+ my_strdup(rli->get_group_master_log_name(), MYF(MY_WME));
+ ptr_group->checkpoint_log_pos= rli->get_group_master_log_pos();
+ ptr_group->checkpoint_relay_log_name=
+ my_strdup(rli->get_group_relay_log_name(), MYF(MY_WME));
+ ptr_group->checkpoint_relay_log_pos= rli->get_group_relay_log_pos();
+ ptr_group->shifted= ret_worker->bitmap_shifted;
+ ret_worker->bitmap_shifted= 0;
+ ret_worker->checkpoint_notified= TRUE;
+ }
ptr_group->checkpoint_seqno= rli->checkpoint_seqno;
ptr_group->ts= when.tv_sec + (time_t) exec_time; // Seconds_behind_master related
rli->checkpoint_seqno++;
=== modified file 'sql/rpl_rli.cc'
--- a/sql/rpl_rli.cc revid:anitha.gopi@stripped
+++ b/sql/rpl_rli.cc revid:andrei.elkin@stripped
@@ -241,6 +241,13 @@ void Relay_log_info::reset_notified_chec
*/
w->checkpoint_notified= FALSE;
w->bitmap_shifted= w->bitmap_shifted + shift;
+ /*
+ Zero shift indicates the caller rotates the master binlog.
+ The new name will be passed to W through the group descriptor
+ during the first post-rotation time scheduling.
+ */
+ if (shift == 0)
+ w->master_log_change_notified= false;
DBUG_PRINT("mts", ("reset_notified_checkpoint shift --> %lu, "
"worker->bitmap_shifted --> %lu, worker --> %u.",
=== modified file 'sql/rpl_rli_pdb.cc'
--- a/sql/rpl_rli_pdb.cc revid:anitha.gopi@stripped
+++ b/sql/rpl_rli_pdb.cc revid:andrei.elkin@stripped
@@ -144,7 +144,8 @@ int Slave_worker::init_worker(Relay_log_
id= i;
curr_group_exec_parts.elements= 0;
relay_log_change_notified= FALSE; // the 1st group to contain relaylog name
- checkpoint_notified= FALSE;
+ checkpoint_notified= FALSE; // the same as above
+ master_log_change_notified= false;// W learns master log during 1st group exec
bitmap_shifted= 0;
workers= c_rli->workers; // shallow copying is sufficient
wq_size_waits_cnt= groups_done= events_done= curr_jobs= 0;
@@ -411,6 +412,23 @@ bool Slave_worker::commit_positions(Log_
{
DBUG_ENTER("Slave_worker::checkpoint_positions");
+ /*
+ Initial value of checkpoint_master_log_name is learned from
+ group_master_log_name. The latter can be passed to Worker
+ at rare event of master binlog rotation.
+ This initialization is needed to provide to Worker info
+ on physical coordiates during execution of the very first group
+ after a rotation.
+ */
+ if (ptr_g->group_master_log_name != NULL)
+ {
+ strmake(group_master_log_name, ptr_g->group_master_log_name,
+ sizeof(group_master_log_name) - 1);
+ my_free(ptr_g->group_master_log_name);
+ ptr_g->group_master_log_name= NULL;
+ strmake(checkpoint_master_log_name, group_master_log_name,
+ sizeof(checkpoint_master_log_name) - 1);
+ }
if (ptr_g->checkpoint_log_name != NULL)
{
strmake(checkpoint_relay_log_name, ptr_g->checkpoint_relay_log_name,
@@ -1444,6 +1462,10 @@ void Slave_committed_queue::free_dynamic
{
my_free(ptr_g->checkpoint_relay_log_name);
}
+ if (ptr_g->group_master_log_name)
+ {
+ my_free(ptr_g->group_master_log_name);
+ }
}
DBUG_ASSERT((avail == size /* full */ || entry == size /* empty */) ||
i == avail /* all occupied are processed */);
=== modified file 'sql/rpl_rli_pdb.h'
--- a/sql/rpl_rli_pdb.h revid:anitha.gopi@stripped
+++ b/sql/rpl_rli_pdb.h revid:andrei.elkin@stripped
@@ -164,7 +164,9 @@ typedef struct st_slave_job_group
volatile uchar done; // Flag raised by W, read and reset by Coordinator
ulong shifted; // shift the last CP bitmap at receiving a new CP
time_t ts; // Group's timestampt to update Seconds_behind_master
-
+#ifndef DBUG_OFF
+ bool notified; // to debug group_master_log_name change notification
+#endif
/*
Coordinator fills the struct with defaults and options at starting of
a group distribution.
@@ -183,6 +185,9 @@ typedef struct st_slave_job_group
checkpoint_relay_log_pos= 0;
checkpoint_seqno= (uint) -1;
done= 0;
+#ifndef DBUG_OFF
+ notified= false;
+#endif
}
} Slave_job_group;
@@ -320,6 +325,7 @@ public:
volatile bool relay_log_change_notified; // Coord sets and resets, W can read
volatile bool checkpoint_notified; // Coord sets and resets, W can read
+ volatile bool master_log_change_notified; // Coord sets and resets, W can read
ulong bitmap_shifted; // shift the last bitmap at receiving new CP
// WQ current excess above the overrun level
long wq_overrun_cnt;
No bundle (reason: useless for push emails).
Thread |
---|
• bzr push into mysql-5.6 branch (andrei.elkin:5286 to 5287) Bug#16594095 | Andrei Elkin | 20 Aug |