From: Andrei Elkin Date: July 8 2011 7:41pm Subject: bzr push into mysql-next-mr-wl5569 branch (andrei.elkin:3337 to 3338) WL#5569 List-Archive: http://lists.mysql.com/commits/140268 Message-Id: <201107081941.p68JfMRB016278@mysql1000.dsl.inet.fi> MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit 3338 Andrei Elkin 2011-07-08 wl#5569 MTS The patch refines logics of applying phase of MTS-recovery to always applying events that are for Coordinator; fixes few tests to make them passable on PB; makes GAQ size to be of checkpoint_group value. @ mysql-test/suite/rpl/t/rpl_parallel_switch_sequential.test attempting to decrease execution time that currently might be too much for some PB hosts. @ mysql-test/suite/rpl/t/rpl_row_crash_safe-slave.opt Making the test to run in parallel mode with Workers having the table as their info storage. @ mysql-test/suite/sys_vars/r/slave_checkpoint_period_basic.result results updated. @ mysql-test/suite/sys_vars/t/slave_checkpoint_period_basic.test masking out the actual value of slave_checkpoint_period. @ sql/log_event.cc Never skip events that are for Coordinator as indicated by mts_execution_mode(). @ sql/rpl_rli.h Improving comments. @ sql/rpl_slave.cc Simplifying while condition of the GAQ-progress loop and deploying an assert ensuring checkpoint_group parameter and GAQ state are combined correctly. modified: mysql-test/suite/rpl/t/rpl_parallel_switch_sequential.test mysql-test/suite/rpl/t/rpl_row_crash_safe-slave.opt mysql-test/suite/rpl/t/rpl_stm_mixed_crash_safe-slave.opt mysql-test/suite/sys_vars/r/slave_checkpoint_period_basic.result mysql-test/suite/sys_vars/t/slave_checkpoint_period_basic.test sql/log_event.cc sql/rpl_rli.h sql/rpl_slave.cc 3337 Alfranio Correia 2011-07-08 Reduced the timeout period to run the checkpoint routine by setting slave-checkpoint-period to 30. modified: mysql-test/collections/default.push === modified file 'mysql-test/suite/rpl/t/rpl_parallel_switch_sequential.test' --- a/mysql-test/suite/rpl/t/rpl_parallel_switch_sequential.test 2011-07-05 17:43:04 +0000 +++ b/mysql-test/suite/rpl/t/rpl_parallel_switch_sequential.test 2011-07-08 19:40:52 +0000 @@ -39,7 +39,7 @@ while ($i) { let $slave_status= 0; let $trx= 0; let $alter= 0; -let $i= 300; +let $i= 200; # with bigger value test times out on some PB hosts if (`select @@binlog_format like "STATEMENT"`) { # relax mtr to scan unsafe warnings let $i=100; === modified file 'mysql-test/suite/rpl/t/rpl_row_crash_safe-slave.opt' --- a/mysql-test/suite/rpl/t/rpl_row_crash_safe-slave.opt 2011-06-27 12:12:52 +0000 +++ b/mysql-test/suite/rpl/t/rpl_row_crash_safe-slave.opt 2011-07-08 19:40:52 +0000 @@ -1 +1 @@ ---skip-core-file --skip-slave-start --relay-log-info-repository=TABLE --slave-worker-info-repository=TABLE +--skip-core-file --skip-slave-start --relay-log-info-repository=TABLE --slave-worker-info-repository=TABLE --relay-log-recovery=1 === modified file 'mysql-test/suite/rpl/t/rpl_stm_mixed_crash_safe-slave.opt' --- a/mysql-test/suite/rpl/t/rpl_stm_mixed_crash_safe-slave.opt 2011-06-27 12:12:52 +0000 +++ b/mysql-test/suite/rpl/t/rpl_stm_mixed_crash_safe-slave.opt 2011-07-08 19:40:52 +0000 @@ -1 +1 @@ ---skip-core-file --skip-slave-start --relay-log-info-repository=TABLE --slave-worker-info-repository=TABLE +--skip-core-file --skip-slave-start --relay-log-info-repository=TABLE --slave-worker-info-repository=TABLE --relay-log-recovery=1 === modified file 'mysql-test/suite/sys_vars/r/slave_checkpoint_period_basic.result' --- a/mysql-test/suite/sys_vars/r/slave_checkpoint_period_basic.result 2011-06-26 12:02:59 +0000 +++ b/mysql-test/suite/sys_vars/r/slave_checkpoint_period_basic.result 2011-07-08 19:40:52 +0000 @@ -9,16 +9,16 @@ select @@session.slave_checkpoint_period ERROR HY000: Variable 'slave_checkpoint_period' is a GLOBAL variable show global variables like 'slave_checkpoint_period'; Variable_name Value -slave_checkpoint_period 300 +slave_checkpoint_period period show session variables like 'slave_checkpoint_period'; Variable_name Value -slave_checkpoint_period 300 +slave_checkpoint_period period select * from information_schema.global_variables where variable_name='slave_checkpoint_period'; VARIABLE_NAME VARIABLE_VALUE -SLAVE_CHECKPOINT_PERIOD 300 +SLAVE_CHECKPOINT_PERIOD period select * from information_schema.session_variables where variable_name='slave_checkpoint_period'; VARIABLE_NAME VARIABLE_VALUE -SLAVE_CHECKPOINT_PERIOD 300 +SLAVE_CHECKPOINT_PERIOD period set global slave_checkpoint_period=1; select @@global.slave_checkpoint_period; @@global.slave_checkpoint_period === modified file 'mysql-test/suite/sys_vars/t/slave_checkpoint_period_basic.test' --- a/mysql-test/suite/sys_vars/t/slave_checkpoint_period_basic.test 2011-06-26 12:02:59 +0000 +++ b/mysql-test/suite/sys_vars/t/slave_checkpoint_period_basic.test 2011-07-08 19:40:52 +0000 @@ -9,9 +9,13 @@ SELECT @start_global_value; select @@global.slave_checkpoint_period; --error ER_INCORRECT_GLOBAL_LOCAL_VAR select @@session.slave_checkpoint_period; +--replace_regex /[0-9]+/period/ show global variables like 'slave_checkpoint_period'; +--replace_regex /[0-9]+/period/ show session variables like 'slave_checkpoint_period'; +--replace_regex /[0-9]+/period/ select * from information_schema.global_variables where variable_name='slave_checkpoint_period'; +--replace_regex /[0-9]+/period/ select * from information_schema.session_variables where variable_name='slave_checkpoint_period'; # === modified file 'sql/log_event.cc' --- a/sql/log_event.cc 2011-07-08 06:44:35 +0000 +++ b/sql/log_event.cc 2011-07-08 19:40:52 +0000 @@ -2676,8 +2676,11 @@ int Log_event::apply_event(Relay_log_inf if (rli->is_mts_recovery()) { - bool skip= bitmap_is_set(&rli->recovery_groups, rli->mts_recovery_index); - + bool skip= + bitmap_is_set(&rli->recovery_groups, rli->mts_recovery_index) && + (mts_execution_mode(::server_id, + rli->mts_group_status == Relay_log_info::MTS_IN_GROUP) + == EVENT_EXEC_PARALLEL); if (skip) { DBUG_RETURN(0); === modified file 'sql/rpl_rli.h' --- a/sql/rpl_rli.h 2011-07-08 06:44:35 +0000 +++ b/sql/rpl_rli.h 2011-07-08 19:40:52 +0000 @@ -439,7 +439,7 @@ public: Slave_worker *last_assigned_worker;// is set to a Worker at assigning a group /* master-binlog ordered queue of Slave_job_group descriptors of groups - that are under processing + that are under processing. The queue size is @c checkpoint_group. */ Slave_committed_queue *gaq; /* === modified file 'sql/rpl_slave.cc' --- a/sql/rpl_slave.cc 2011-07-08 06:44:35 +0000 +++ b/sql/rpl_slave.cc 2011-07-08 19:40:52 +0000 @@ -4145,6 +4145,15 @@ bool mts_checkpoint_routine(Relay_log_in #endif /* + rli->checkpoint_group can have two possible values due to + two possible status of the last (being scheduled) group. + */ + DBUG_ASSERT(!rli->gaq->full() || + ((rli->checkpoint_seqno == rli->checkpoint_group -1 && + rli->mts_group_status == Relay_log_info::MTS_IN_GROUP) || + rli->checkpoint_seqno == rli->checkpoint_group)); + + /* Currently, the checkpoint routine is being called by the SQL Thread. For that reason, this function is called call from appropriate points in the SQL Thread's execution path and the elapsed time is calculated @@ -4152,7 +4161,7 @@ bool mts_checkpoint_routine(Relay_log_in */ set_timespec_nsec(curr_clock, 0); ulonglong diff= diff_timespec(curr_clock, rli->last_clock); - if (!force && diff < period && !rli->gaq->full()) + if (!force && diff < period) { /* We do not need to execute the checkpoint now because @@ -4170,7 +4179,7 @@ bool mts_checkpoint_routine(Relay_log_in sql_print_error("This an error cnt != mts_checkpoint_period"); #endif } while (!sql_slave_killed(rli->info_thd, rli) && - cnt == 0 && (rli->gaq->full() || force) && + cnt == 0 && force && !DBUG_EVALUATE_IF("check_slave_debug_group", 1, 0) && (my_sleep(rli->mts_coordinator_basic_nap), 1)); /* @@ -4330,21 +4339,18 @@ int slave_start_workers(Relay_log_info * /* GAQ queue holds seqno:s of scheduled groups. C polls workers in @c lwm_checkpoint_period to update GAQ (see @c next_event()) - The length of GAQ is derived from @c opt_mts_slave_worker_queue_len_max - to guarantee each assigned job being sent to a WQ will find room in GAQ. - mts_slave_worker_queue_len_max * num-of-W:s is the max length case - all jobs contain one event. + The length of GAQ is set to be equal to checkpoint_group. + Notice, the size matters for mts_checkpoint_routine's progress loop. */ - // length of WQ is actually constant though can be made configurable - rli->mts_slave_worker_queue_len_max= mts_slave_worker_queue_len_max; rli->gaq= new Slave_committed_queue(rli->get_group_master_log_name(), sizeof(Slave_job_group), - 1 + rli->opt_slave_parallel_workers * - rli->mts_slave_worker_queue_len_max, n); + rli->checkpoint_group, n); if (!rli->gaq->inited) return 1; + // length of WQ is actually constant though can be made configurable + rli->mts_slave_worker_queue_len_max= mts_slave_worker_queue_len_max; rli->mts_pending_jobs_size= 0; rli->mts_pending_jobs_size_max= ::opt_mts_pending_jobs_size_max; rli->mts_wq_underrun_w_id= MTS_WORKER_UNDEF; No bundle (reason: useless for push emails).