#At file:///home/andrei/MySQL/BZR/2a-23May/WL/mysql-next-mr-wl5569/ based on revid:andrei.elkin@stripped
3282 Andrei Elkin 2011-06-06
wl#5569 MTS
STOP SLAVE now stops consistently w/o gaps, KILL
shall be used for an urgent stop, an error case behaves like the
killed. For instance, a Worker errors out, it sends KILL to
Coordinator through THD::awake(), and Coordinator kill the rest
through setting a special Worker-running status to killed (which
breaks the read-exec loop of a Worker).
@ sql/log_event.cc
Changing style of computing mts-in-group bool arg into mts_async_exec_by_coordinator().
@ sql/rpl_rli.cc
Changing style of computing mts-in-group arg of an if in stmt_done().
@ sql/rpl_rli.h
Adding more states to Coordinator's MTS-group view.
@ sql/rpl_rli_pdb.cc
Relocating notification of a Worker's failure by the Worker into
the error-branch of a functioning releasing common resources (entries of APH hash).
The failed Worker trying awakening possibly waiting for the signal Coordinator.
The latter's behaviour in it's turn is refined to not enter the waiting phase
when it has been already killed.
@ sql/rpl_slave.cc
sql_slave_killed() is made of two flavors of the error branches.
STOPped MTS coordinator does not give out too early and wait till
its MTS-group state allows that.
Notification with kill to Coordinator from the errored-out or killed
worker is moved into a functioning releasing common resources (entries of APH hash).
This case designates a hard stop.
In case of the soft (SLAVE-STOPped) MTS, Coordinator is made to wait for
Workers' assignements full completion before to mark their running status for stopping.
modified:
sql/log_event.cc
sql/rpl_rli.cc
sql/rpl_rli.h
sql/rpl_rli_pdb.cc
sql/rpl_slave.cc
=== modified file 'sql/log_event.cc'
--- a/sql/log_event.cc 2011-06-05 17:01:51 +0000
+++ b/sql/log_event.cc 2011-06-06 10:51:19 +0000
@@ -2581,6 +2581,7 @@ Slave_worker *Log_event::get_slave_worke
// special marking for T event of {p,g} B-less group
if (num_dbs == OVER_MAX_DBS_IN_EVENT_MTS)
mts_do_isolate_event();
+ rli->mts_group_status= Relay_log_info::MTS_END_GROUP;
ptr_g= (Slave_job_group *)
dynamic_array_ptr(&rli->gaq->Q, rli->gaq->assigned_group_index);
@@ -2850,7 +2851,7 @@ int Log_event::apply_event(Relay_log_inf
if (!(parallel= rli->is_parallel_exec()) ||
(async_event=
mts_async_exec_by_coordinator(::server_id,
- rli->mts_group_status == Relay_log_info::MTS_IN_GROUP)) ||
+ rli->mts_group_status != Relay_log_info::MTS_NOT_IN_GROUP)) ||
(seq_event= mts_sequential_exec()))
{
if (parallel)
=== modified file 'sql/rpl_rli.cc'
--- a/sql/rpl_rli.cc 2011-06-05 17:01:51 +0000
+++ b/sql/rpl_rli.cc 2011-06-06 10:51:19 +0000
@@ -1020,7 +1020,7 @@ void Relay_log_info::stmt_done(my_off_t
while the MyISAM table has already been updated.
*/
if ((!is_parallel_exec() && is_in_group()) ||
- mts_group_status == MTS_IN_GROUP)
+ mts_group_status != MTS_NOT_IN_GROUP)
{
inc_event_relay_log_pos();
}
=== modified file 'sql/rpl_rli.h'
--- a/sql/rpl_rli.h 2011-06-05 17:01:51 +0000
+++ b/sql/rpl_rli.h 2011-06-06 10:51:19 +0000
@@ -488,8 +488,14 @@ public:
*/
enum
{
- MTS_NOT_IN_GROUP, /* not in group includes Single-Threaded-Slave */
- MTS_IN_GROUP /* an event was scheduled to a Worker */
+ /*
+ no new events were scheduled after last synchronization,
+ includes Single-Threaded-Slave case.
+ */
+ MTS_NOT_IN_GROUP,
+ MTS_IN_GROUP, /* at least one event was scheduled to a Worker */
+ MTS_END_GROUP, /* the last scheduled event is a terminal event */
+ MTS_KILLED_GROUP /* Coordinator gave out to reach MTS_END_GROUP */
} mts_group_status;
/* most of allocation in the coordinator rli is there */
=== modified file 'sql/rpl_rli_pdb.cc'
--- a/sql/rpl_rli_pdb.cc 2011-06-05 17:01:51 +0000
+++ b/sql/rpl_rli_pdb.cc 2011-06-06 10:51:19 +0000
@@ -816,6 +816,17 @@ void Slave_worker::slave_worker_ends_gro
ep->elements= 0;
curr_group_seen_begin= FALSE;
+
+ if (error)
+ {
+ mysql_mutex_lock(&slave_worker_hash_lock);
+ mysql_mutex_lock(&c_rli->info_thd->LOCK_thd_data);
+
+ c_rli->info_thd->awake(THD::KILL_QUERY); // notify Crdn
+
+ mysql_mutex_unlock(&c_rli->info_thd->LOCK_thd_data);
+ mysql_mutex_unlock(&slave_worker_hash_lock);
+ }
}
@@ -1114,7 +1125,7 @@ int wait_for_workers_to_finish(Relay_log
continue;
}
- if (entry->usage > 0)
+ if (entry->usage > 0 && !thd->killed)
{
sprintf(wait_info, info_format, entry->worker->id, entry->db);
entry->worker= NULL; // mark Worker to signal when usage drops to 0
@@ -1125,7 +1136,7 @@ int wait_for_workers_to_finish(Relay_log
thd->exit_cond(proc_info);
ret++;
- DBUG_ASSERT(entry->usage == 0 || thd->killed || rli->abort_slave);
+ DBUG_ASSERT(entry->usage == 0 || thd->killed);
}
else
{
=== modified file 'sql/rpl_slave.cc'
--- a/sql/rpl_slave.cc 2011-06-05 17:01:51 +0000
+++ b/sql/rpl_slave.cc 2011-06-06 10:51:19 +0000
@@ -1090,9 +1090,10 @@ static bool sql_slave_killed(THD* thd, R
as well.
Example: OPTION_KEEP_LOG is set if a temporary table is created or dropped.
*/
- if ((thd->transaction.all.modified_non_trans_table ||
- (thd->variables.option_bits & OPTION_KEEP_LOG))
- && rli->is_in_group())
+ if ((!rli->is_parallel_exec() &&
+ (thd->transaction.all.modified_non_trans_table ||
+ (thd->variables.option_bits & OPTION_KEEP_LOG)) && rli->is_in_group())
+ || (rli->mts_group_status == Relay_log_info::MTS_IN_GROUP))
{
char msg_stopped[]=
"... The slave SQL is stopped, leaving the current group "
@@ -1101,6 +1102,14 @@ static bool sql_slave_killed(THD* thd, R
"restarting the slave with --slave-exec-mode=IDEMPOTENT, which "
"ignores duplicate key, key not found, and similar errors (see "
"documentation for details).";
+ char msg_stopped_mts[]=
+ "... The slave Coordinator and Worker threads are stopped, possibly "
+ "leaving data in inconsistent state. The following restart shall "
+ "restore consistency automatically. There might be exceptional situations "
+ "in the recovery caused by combination of non-transactional storage for "
+ "either of Coordinator or Workers info tables and updating non-transactional "
+ "data tables or DDL queries. In such cases you have to examine your data "
+ "(see documentation for details).";
if (rli->abort_slave)
{
@@ -1137,7 +1146,9 @@ static bool sql_slave_killed(THD* thd, R
else
{
rli->report(ERROR_LEVEL, ER_SLAVE_FATAL_ERROR,
- ER(ER_SLAVE_FATAL_ERROR), msg_stopped);
+ ER(ER_SLAVE_FATAL_ERROR),
+ rli->mts_group_status == Relay_log_info::MTS_NOT_IN_GROUP ?
+ msg_stopped : msg_stopped_mts);
}
}
else
@@ -1153,7 +1164,13 @@ static bool sql_slave_killed(THD* thd, R
}
}
if (ret)
+ {
rli->last_event_start_time= 0;
+ if (rli->mts_group_status == Relay_log_info::MTS_IN_GROUP)
+ {
+ rli->mts_group_status= Relay_log_info::MTS_KILLED_GROUP;
+ }
+ }
DBUG_RETURN(ret);
}
@@ -3798,14 +3815,7 @@ pthread_handler_t handle_slave_worker(vo
{
error= slave_worker_exec_job(w, rli);
}
-
w->cleanup_context(thd, error);
- if (error)
- {
- mysql_mutex_lock(&rli->info_thd->LOCK_thd_data);
- rli->info_thd->awake(THD::KILL_QUERY); // notify Crdn
- mysql_mutex_unlock(&rli->info_thd->LOCK_thd_data);
- }
mysql_mutex_lock(&w->jobs_lock);
@@ -4293,9 +4303,12 @@ err:
/*
Ending Worker threads.
+ Not in case Coordinator is killed itself, it first waits for
+ Workers have finished their assignements, and then updates checkpoint.
Workers are notified with setting KILLED status
and waited for their acknowledgment as specified by
worker's running_status.
+ Coordinator finalizes with its MTS running status to reset few objects.
*/
void slave_stop_workers(Relay_log_info *rli)
{
@@ -4306,11 +4319,21 @@ void slave_stop_workers(Relay_log_info *
return;
/*
- this is the soft stop. In order for waiting be successful Coordinator
- needs (*TODO*) to guarantee Workers were assigned with full groups.
+ In case of the "soft" graceful stop Coordinator
+ guaranteed Workers were assigned with full groups so waiting
+ will be resultful.
+ "Hard" stop with KILLing Coordinator or erroring out by a Worker
+ can't wait for Workers' completion because those may not receive
+ commit-events of last assigned groups.
*/
- // (void) wait_for_workers_to_finish(rli);
+ if (rli->mts_group_status != Relay_log_info::MTS_KILLED_GROUP &&
+ thd->killed == THD::NOT_KILLED)
+ {
+ DBUG_ASSERT(rli->mts_group_status != Relay_log_info::MTS_IN_GROUP);
+ (void) wait_for_workers_to_finish(rli);
+ (void) mts_checkpoint_routine(rli, 0, FALSE, FALSE); // todo: error branch
+ }
for (i= rli->workers.elements - 1; i >= 0; i--)
{
Slave_worker *w;
Attachment: [text/bzr-bundle] bzr/andrei.elkin@oracle.com-20110606105119-j8yk9b45uvirqvf4.bundle
| Thread |
|---|
| • bzr commit into mysql-next-mr-wl5569 branch (andrei.elkin:3282) WL#5569 | Andrei Elkin | 6 Jun |