4174 Andrei Elkin 2012-08-24 [merge]
merge from 5.6 repo
=== modified file 'sql/rpl_rli.h'
--- a/sql/rpl_rli.h 2012-08-09 10:05:01 +0000
+++ b/sql/rpl_rli.h 2012-08-24 13:00:09 +0000
@@ -573,7 +573,7 @@ public:
/*
MTS statistics:
*/
- ulong mts_events_assigned; // number of events (statements) scheduled
+ ulonglong mts_events_assigned; // number of events (statements) scheduled
ulong mts_groups_assigned; // number of groups (transactions) scheduled
volatile ulong mts_wq_overrun_cnt; // counter of all mts_wq_excess_cnt increments
ulong wq_size_waits_cnt; // number of times C slept due to WQ:s oversize
@@ -590,6 +590,7 @@ public:
a new partition. Is updated at checkpoint commit to the main RLI.
*/
DYNAMIC_ARRAY least_occupied_workers;
+ time_t mts_last_online_stat;
/* end of MTS statistics */
/* most of allocation in the coordinator rli is there */
=== modified file 'sql/rpl_rli_pdb.cc'
--- a/sql/rpl_rli_pdb.cc 2012-08-21 08:50:39 +0000
+++ b/sql/rpl_rli_pdb.cc 2012-08-24 13:00:09 +0000
@@ -163,7 +163,9 @@ int Slave_worker::init_worker(Relay_log_
insert_dynamic(&jobs.Q, (uchar*) &empty);
DBUG_ASSERT(jobs.Q.elements == jobs.size);
- wq_overrun_set= FALSE;
+ wq_overrun_cnt= 0;
+ // overrun level is symmetric to underrun (as underrun to the full queue)
+ overrun_level= ((100 - rli->mts_worker_underrun_level) * jobs.size) / 100.0;
DBUG_RETURN(0);
}
@@ -1655,7 +1657,11 @@ bool append_item_to_jobs(slave_job_item
thd->EXIT_COND(&old_stage);
if (thd->killed)
return true;
-
+ if (log_warnings > 1 && (rli->wq_size_waits_cnt % 10 == 1))
+ sql_print_information("Multi-threaded slave: Coordinator has waited "
+ "%lu times hitting slave_pending_jobs_size_max; "
+ "current event size = %lu.",
+ rli->wq_size_waits_cnt, ev_size);
mysql_mutex_lock(&rli->pending_jobs_lock);
new_pend_size= rli->mts_pending_jobs_size + ev_size;
@@ -1667,13 +1673,23 @@ bool append_item_to_jobs(slave_job_item
mysql_mutex_unlock(&rli->pending_jobs_lock);
/*
- Sleep unless there is an underrunning Worker.
+ Sleep unless there is an underrunning Worker and the current Worker
+ queue is not empty.
*/
- if (rli->mts_wq_underrun_w_id == MTS_WORKER_UNDEF)
+ if (rli->mts_wq_underrun_w_id == MTS_WORKER_UNDEF && worker->jobs.len > 0)
{
- // todo: experiment with weight to get a good approximation formula
- // The longer Sleep lasts the bigger is excessive overrun counter.
+ /*
+ todo: experiment with weight to get a good approximation formula
+ The bigger the excessive overrun counter the longer the nap.
+ */
ulong nap_weight= rli->mts_wq_excess_cnt + 1;
+ /*
+ Nap time is a product of a weight factor and the basic nap unit.
+ The weight factor is proportional to the worker queues overrun excess
+ counter. For example when there is only one overruning Worker
+ the max nap_weight as 0.1 * worker->jobs.size is
+ about 1600 so the max nap time is approx 0.008 secs.
+ */
my_sleep(nap_weight * rli->mts_coordinator_basic_nap);
rli->mts_wq_no_underrun_cnt++;
}
@@ -1893,9 +1909,14 @@ int slave_worker_exec_job(Slave_worker *
rli->mts_pending_jobs_size -= ev->data_written;
DBUG_ASSERT(rli->mts_pending_jobs_size < rli->mts_pending_jobs_size_max);
- // underrun (number of pending assignments is less than underrun level)
- if ((rli->mts_worker_underrun_level * worker->jobs.size) / 100.0 >
- worker->jobs.len)
+ /*
+ The positive branch is underrun: number of pending assignments
+ is less than underrun level.
+ Zero of jobs.len has to reset underrun w_id as the worker may get
+ the next piece of assignement in a long time.
+ */
+ if (((rli->mts_worker_underrun_level * worker->jobs.size) / 100.0 >
+ worker->jobs.len) && (worker->jobs.len != 0))
{
rli->mts_wq_underrun_w_id= worker->id;
} else if (rli->mts_wq_underrun_w_id == worker->id)
@@ -1903,22 +1924,37 @@ int slave_worker_exec_job(Slave_worker *
// reset only own marking
rli->mts_wq_underrun_w_id= MTS_WORKER_UNDEF;
}
-
- // overrun is symmetric to underrun. In a sense it's underrun to get to 100%
- if (((100 - rli->mts_worker_underrun_level) * worker->jobs.size) / 100.0
- < worker->jobs.len)
+
+ /*
+ Overrun handling.
+ Incrementing the Worker private and the total excess counter corresponding
+ to number of events filled in at over
+ (100 - rli->mts_worker_underrun_level) level.
+ The increment amount to the total counter is a difference between
+ the current and the previous private excess (worker->wq_overrun_cnt).
+ When the current queue length drops below overrun_level the global
+ counter is decremented, the local is reset.
+ */
+ if (worker->overrun_level < worker->jobs.len)
{
- rli->mts_wq_excess_cnt++;
- worker->wq_overrun_set= TRUE;
- rli->mts_wq_overrun_cnt++;
+ ulong last_overrun= worker->wq_overrun_cnt;
+
+ worker->wq_overrun_cnt= worker->jobs.len - worker->overrun_level; //current
+ rli->mts_wq_excess_cnt+= (worker->wq_overrun_cnt - last_overrun);
+ rli->mts_wq_overrun_cnt++; // statistics
+
+ // guarding correctness of incrementing in case of the only one Worker
+ DBUG_ASSERT(rli->workers.elements != 1 ||
+ rli->mts_wq_excess_cnt == worker->wq_overrun_cnt);
}
- else if (worker->wq_overrun_set == TRUE)
+ else if (worker->wq_overrun_cnt > 0)
{
- rli->mts_wq_excess_cnt--;
- worker->wq_overrun_set= FALSE;
- }
+ // When level drops below the total excess is decremented
+ rli->mts_wq_excess_cnt -= worker->wq_overrun_cnt;
+ worker->wq_overrun_cnt= 0; // and the local is reset
- DBUG_ASSERT(rli->mts_wq_excess_cnt >= 0);
+ DBUG_ASSERT(rli->mts_wq_excess_cnt >= 0);
+ }
/* coordinator can be waiting */
if (rli->mts_pending_jobs_size < rli->mts_pending_jobs_size_max &&
=== modified file 'sql/rpl_rli_pdb.h'
--- a/sql/rpl_rli_pdb.h 2012-08-22 08:24:05 +0000
+++ b/sql/rpl_rli_pdb.h 2012-08-24 13:00:09 +0000
@@ -321,8 +321,14 @@ public:
volatile bool relay_log_change_notified; // Coord sets and resets, W can read
volatile bool checkpoint_notified; // Coord sets and resets, W can read
ulong bitmap_shifted; // shift the last bitmap at receiving new CP
- bool wq_overrun_set; // W marks inself as incrementer of rli->mts_wq_excess_cnt
-
+ // W private counter to incrementer in step with rli->mts_wq_excess_cnt
+ long wq_overrun_cnt;
+ /*
+ number of events starting from which Worker queue is regarded as
+ close to full. The number of the excessive events yields a weight factor
+ to compute Coordinator's nap.
+ */
+ ulong overrun_level;
/*
Coordinates of the last CheckPoint (CP) this Worker has
acknowledged; part of is persisent data
=== modified file 'sql/rpl_slave.cc'
--- a/sql/rpl_slave.cc 2012-08-09 10:05:01 +0000
+++ b/sql/rpl_slave.cc 2012-08-24 13:00:09 +0000
@@ -91,9 +91,15 @@ const char *relay_log_basename= 0;
const ulong mts_slave_worker_queue_len_max= 16384;
/*
+ Statistics go to the error log every # of seconds when --log-warnings > 1
+*/
+const long mts_online_stat_period= 60 * 2;
+
+
+/*
MTS load-ballancing parameter.
- Time in microsecs to sleep by MTS Coordinator to avoid the Worker queues
- room overrun.
+ Time unit in microsecs to sleep by MTS Coordinator to avoid extra thread
+ signalling in the case of Worker queues are close to be filled up.
*/
const ulong mts_coordinator_basic_nap= 5;
@@ -101,8 +107,18 @@ const ulong mts_coordinator_basic_nap= 5
MTS load-ballancing parameter.
Percent of Worker queue size at which Worker is considered to become
hungry.
+
+ C enqueues --+ . underrun level
+ V "
+ +----------+-+------------------+--------------+
+ | empty |.|::::::::::::::::::|xxxxxxxxxxxxxx| ---> Worker dequeues
+ +----------+-+------------------+--------------+
+
+ Like in the above diagram enqueuing to the x-d area would indicate
+ actual underrruning by Worker.
*/
const ulong mts_worker_underrun_level= 10;
+
Slave_job_item * de_queue(Slave_jobs_queue *jobs, Slave_job_item *ret);
bool append_item_to_jobs(slave_job_item *job_item,
Slave_worker *w, Relay_log_info *rli);
@@ -3374,6 +3390,30 @@ apply_event_and_update_pos(Log_event** p
}
*ptr_ev= NULL; // announcing the event is passed to w-worker
+
+ if (log_warnings > 1 && rli->mts_events_assigned % 1024 == 1)
+ {
+ time_t my_now= my_time(0);
+
+ if ((my_now - rli->mts_last_online_stat) >=
+ mts_online_stat_period)
+ {
+ sql_print_information("Multi-threaded slave statistics: "
+ "seconds elapsed = %lu; "
+ "events assigned = %llu; "
+ "worker queues filled over overrun level = %lu; "
+ "waited due a Worker queue full = %lu; "
+ "waited due the total size = %lu; "
+ "slept when Workers occupied = %lu ",
+ my_now - rli->mts_last_online_stat,
+ rli->mts_events_assigned,
+ rli->mts_wq_overrun_cnt,
+ rli->mts_wq_overfill_cnt,
+ rli->wq_size_waits_cnt,
+ rli->mts_wq_no_underrun_cnt);
+ rli->mts_last_online_stat= my_now;
+ }
+ }
}
}
else
@@ -5175,11 +5215,11 @@ void slave_stop_workers(Relay_log_info *
}
if (log_warnings > 1)
- sql_print_information("Multi-threaded slave statistics: "
- "events processed = %lu ;"
- "worker queues filled over overrun level = %lu ;"
- "waited due a Worker queue full = %lu ;"
- "waited due the total size = %lu ;"
+ sql_print_information("Total MTS session statistics: "
+ "events processed = %llu; "
+ "worker queues filled over overrun level = %lu; "
+ "waited due a Worker queue full = %lu; "
+ "waited due the total size = %lu; "
"slept when Workers occupied = %lu ",
rli->mts_events_assigned, rli->mts_wq_overrun_cnt,
rli->mts_wq_overfill_cnt, rli->wq_size_waits_cnt,
No bundle (reason: useless for push emails).
| Thread |
|---|
| • bzr push into mysql-5.6 branch (andrei.elkin:4174) | Andrei Elkin | 27 Aug |