From: Andrei Elkin Date: August 24 2012 1:22pm Subject: bzr push into mysql-5.6 branch (andrei.elkin:4174) List-Archive: http://lists.mysql.com/commits/144619 Message-Id: <201208241322.q7ODMj8s027904@mysql1000.dsl.inet.fi> MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit 4174 Andrei Elkin 2012-08-24 [merge] merge from 5.6 repo === modified file 'sql/rpl_rli.h' --- a/sql/rpl_rli.h 2012-08-09 10:05:01 +0000 +++ b/sql/rpl_rli.h 2012-08-24 13:00:09 +0000 @@ -573,7 +573,7 @@ public: /* MTS statistics: */ - ulong mts_events_assigned; // number of events (statements) scheduled + ulonglong mts_events_assigned; // number of events (statements) scheduled ulong mts_groups_assigned; // number of groups (transactions) scheduled volatile ulong mts_wq_overrun_cnt; // counter of all mts_wq_excess_cnt increments ulong wq_size_waits_cnt; // number of times C slept due to WQ:s oversize @@ -590,6 +590,7 @@ public: a new partition. Is updated at checkpoint commit to the main RLI. */ DYNAMIC_ARRAY least_occupied_workers; + time_t mts_last_online_stat; /* end of MTS statistics */ /* most of allocation in the coordinator rli is there */ === modified file 'sql/rpl_rli_pdb.cc' --- a/sql/rpl_rli_pdb.cc 2012-08-21 08:50:39 +0000 +++ b/sql/rpl_rli_pdb.cc 2012-08-24 13:00:09 +0000 @@ -163,7 +163,9 @@ int Slave_worker::init_worker(Relay_log_ insert_dynamic(&jobs.Q, (uchar*) &empty); DBUG_ASSERT(jobs.Q.elements == jobs.size); - wq_overrun_set= FALSE; + wq_overrun_cnt= 0; + // overrun level is symmetric to underrun (as underrun to the full queue) + overrun_level= ((100 - rli->mts_worker_underrun_level) * jobs.size) / 100.0; DBUG_RETURN(0); } @@ -1655,7 +1657,11 @@ bool append_item_to_jobs(slave_job_item thd->EXIT_COND(&old_stage); if (thd->killed) return true; - + if (log_warnings > 1 && (rli->wq_size_waits_cnt % 10 == 1)) + sql_print_information("Multi-threaded slave: Coordinator has waited " + "%lu times hitting slave_pending_jobs_size_max; " + "current event size = %lu.", + rli->wq_size_waits_cnt, ev_size); mysql_mutex_lock(&rli->pending_jobs_lock); new_pend_size= rli->mts_pending_jobs_size + ev_size; @@ -1667,13 +1673,23 @@ bool append_item_to_jobs(slave_job_item mysql_mutex_unlock(&rli->pending_jobs_lock); /* - Sleep unless there is an underrunning Worker. + Sleep unless there is an underrunning Worker and the current Worker + queue is not empty. */ - if (rli->mts_wq_underrun_w_id == MTS_WORKER_UNDEF) + if (rli->mts_wq_underrun_w_id == MTS_WORKER_UNDEF && worker->jobs.len > 0) { - // todo: experiment with weight to get a good approximation formula - // The longer Sleep lasts the bigger is excessive overrun counter. + /* + todo: experiment with weight to get a good approximation formula + The bigger the excessive overrun counter the longer the nap. + */ ulong nap_weight= rli->mts_wq_excess_cnt + 1; + /* + Nap time is a product of a weight factor and the basic nap unit. + The weight factor is proportional to the worker queues overrun excess + counter. For example when there is only one overruning Worker + the max nap_weight as 0.1 * worker->jobs.size is + about 1600 so the max nap time is approx 0.008 secs. + */ my_sleep(nap_weight * rli->mts_coordinator_basic_nap); rli->mts_wq_no_underrun_cnt++; } @@ -1893,9 +1909,14 @@ int slave_worker_exec_job(Slave_worker * rli->mts_pending_jobs_size -= ev->data_written; DBUG_ASSERT(rli->mts_pending_jobs_size < rli->mts_pending_jobs_size_max); - // underrun (number of pending assignments is less than underrun level) - if ((rli->mts_worker_underrun_level * worker->jobs.size) / 100.0 > - worker->jobs.len) + /* + The positive branch is underrun: number of pending assignments + is less than underrun level. + Zero of jobs.len has to reset underrun w_id as the worker may get + the next piece of assignement in a long time. + */ + if (((rli->mts_worker_underrun_level * worker->jobs.size) / 100.0 > + worker->jobs.len) && (worker->jobs.len != 0)) { rli->mts_wq_underrun_w_id= worker->id; } else if (rli->mts_wq_underrun_w_id == worker->id) @@ -1903,22 +1924,37 @@ int slave_worker_exec_job(Slave_worker * // reset only own marking rli->mts_wq_underrun_w_id= MTS_WORKER_UNDEF; } - - // overrun is symmetric to underrun. In a sense it's underrun to get to 100% - if (((100 - rli->mts_worker_underrun_level) * worker->jobs.size) / 100.0 - < worker->jobs.len) + + /* + Overrun handling. + Incrementing the Worker private and the total excess counter corresponding + to number of events filled in at over + (100 - rli->mts_worker_underrun_level) level. + The increment amount to the total counter is a difference between + the current and the previous private excess (worker->wq_overrun_cnt). + When the current queue length drops below overrun_level the global + counter is decremented, the local is reset. + */ + if (worker->overrun_level < worker->jobs.len) { - rli->mts_wq_excess_cnt++; - worker->wq_overrun_set= TRUE; - rli->mts_wq_overrun_cnt++; + ulong last_overrun= worker->wq_overrun_cnt; + + worker->wq_overrun_cnt= worker->jobs.len - worker->overrun_level; //current + rli->mts_wq_excess_cnt+= (worker->wq_overrun_cnt - last_overrun); + rli->mts_wq_overrun_cnt++; // statistics + + // guarding correctness of incrementing in case of the only one Worker + DBUG_ASSERT(rli->workers.elements != 1 || + rli->mts_wq_excess_cnt == worker->wq_overrun_cnt); } - else if (worker->wq_overrun_set == TRUE) + else if (worker->wq_overrun_cnt > 0) { - rli->mts_wq_excess_cnt--; - worker->wq_overrun_set= FALSE; - } + // When level drops below the total excess is decremented + rli->mts_wq_excess_cnt -= worker->wq_overrun_cnt; + worker->wq_overrun_cnt= 0; // and the local is reset - DBUG_ASSERT(rli->mts_wq_excess_cnt >= 0); + DBUG_ASSERT(rli->mts_wq_excess_cnt >= 0); + } /* coordinator can be waiting */ if (rli->mts_pending_jobs_size < rli->mts_pending_jobs_size_max && === modified file 'sql/rpl_rli_pdb.h' --- a/sql/rpl_rli_pdb.h 2012-08-22 08:24:05 +0000 +++ b/sql/rpl_rli_pdb.h 2012-08-24 13:00:09 +0000 @@ -321,8 +321,14 @@ public: volatile bool relay_log_change_notified; // Coord sets and resets, W can read volatile bool checkpoint_notified; // Coord sets and resets, W can read ulong bitmap_shifted; // shift the last bitmap at receiving new CP - bool wq_overrun_set; // W marks inself as incrementer of rli->mts_wq_excess_cnt - + // W private counter to incrementer in step with rli->mts_wq_excess_cnt + long wq_overrun_cnt; + /* + number of events starting from which Worker queue is regarded as + close to full. The number of the excessive events yields a weight factor + to compute Coordinator's nap. + */ + ulong overrun_level; /* Coordinates of the last CheckPoint (CP) this Worker has acknowledged; part of is persisent data === modified file 'sql/rpl_slave.cc' --- a/sql/rpl_slave.cc 2012-08-09 10:05:01 +0000 +++ b/sql/rpl_slave.cc 2012-08-24 13:00:09 +0000 @@ -91,9 +91,15 @@ const char *relay_log_basename= 0; const ulong mts_slave_worker_queue_len_max= 16384; /* + Statistics go to the error log every # of seconds when --log-warnings > 1 +*/ +const long mts_online_stat_period= 60 * 2; + + +/* MTS load-ballancing parameter. - Time in microsecs to sleep by MTS Coordinator to avoid the Worker queues - room overrun. + Time unit in microsecs to sleep by MTS Coordinator to avoid extra thread + signalling in the case of Worker queues are close to be filled up. */ const ulong mts_coordinator_basic_nap= 5; @@ -101,8 +107,18 @@ const ulong mts_coordinator_basic_nap= 5 MTS load-ballancing parameter. Percent of Worker queue size at which Worker is considered to become hungry. + + C enqueues --+ . underrun level + V " + +----------+-+------------------+--------------+ + | empty |.|::::::::::::::::::|xxxxxxxxxxxxxx| ---> Worker dequeues + +----------+-+------------------+--------------+ + + Like in the above diagram enqueuing to the x-d area would indicate + actual underrruning by Worker. */ const ulong mts_worker_underrun_level= 10; + Slave_job_item * de_queue(Slave_jobs_queue *jobs, Slave_job_item *ret); bool append_item_to_jobs(slave_job_item *job_item, Slave_worker *w, Relay_log_info *rli); @@ -3374,6 +3390,30 @@ apply_event_and_update_pos(Log_event** p } *ptr_ev= NULL; // announcing the event is passed to w-worker + + if (log_warnings > 1 && rli->mts_events_assigned % 1024 == 1) + { + time_t my_now= my_time(0); + + if ((my_now - rli->mts_last_online_stat) >= + mts_online_stat_period) + { + sql_print_information("Multi-threaded slave statistics: " + "seconds elapsed = %lu; " + "events assigned = %llu; " + "worker queues filled over overrun level = %lu; " + "waited due a Worker queue full = %lu; " + "waited due the total size = %lu; " + "slept when Workers occupied = %lu ", + my_now - rli->mts_last_online_stat, + rli->mts_events_assigned, + rli->mts_wq_overrun_cnt, + rli->mts_wq_overfill_cnt, + rli->wq_size_waits_cnt, + rli->mts_wq_no_underrun_cnt); + rli->mts_last_online_stat= my_now; + } + } } } else @@ -5175,11 +5215,11 @@ void slave_stop_workers(Relay_log_info * } if (log_warnings > 1) - sql_print_information("Multi-threaded slave statistics: " - "events processed = %lu ;" - "worker queues filled over overrun level = %lu ;" - "waited due a Worker queue full = %lu ;" - "waited due the total size = %lu ;" + sql_print_information("Total MTS session statistics: " + "events processed = %llu; " + "worker queues filled over overrun level = %lu; " + "waited due a Worker queue full = %lu; " + "waited due the total size = %lu; " "slept when Workers occupied = %lu ", rli->mts_events_assigned, rli->mts_wq_overrun_cnt, rli->mts_wq_overfill_cnt, rli->wq_size_waits_cnt, No bundle (reason: useless for push emails).