List:Commits« Previous MessageNext Message »
From:Andrei Elkin Date:July 8 2011 7:41pm
Subject:bzr push into mysql-next-mr-wl5569 branch (andrei.elkin:3337 to 3338) WL#5569
View as plain text  
 3338 Andrei Elkin	2011-07-08
      wl#5569 MTS
      
      The patch 
      refines logics of applying phase of MTS-recovery to always applying events
      that are for Coordinator;
      fixes few tests to make them passable on PB;
      makes GAQ size to be of checkpoint_group value.
     @ mysql-test/suite/rpl/t/rpl_parallel_switch_sequential.test
        attempting to decrease execution time that currently might be too much for some PB hosts.
     @ mysql-test/suite/rpl/t/rpl_row_crash_safe-slave.opt
        Making the test to run in parallel mode with Workers having the table as their info storage.
     @ mysql-test/suite/sys_vars/r/slave_checkpoint_period_basic.result
        results updated.
     @ mysql-test/suite/sys_vars/t/slave_checkpoint_period_basic.test
        masking out the actual value of slave_checkpoint_period.
     @ sql/log_event.cc
        Never skip events that are for Coordinator as indicated by mts_execution_mode().
     @ sql/rpl_rli.h
        Improving comments.
     @ sql/rpl_slave.cc
        Simplifying while condition of the GAQ-progress loop and deploying an
        assert ensuring checkpoint_group parameter and GAQ state are combined correctly.

    modified:
      mysql-test/suite/rpl/t/rpl_parallel_switch_sequential.test
      mysql-test/suite/rpl/t/rpl_row_crash_safe-slave.opt
      mysql-test/suite/rpl/t/rpl_stm_mixed_crash_safe-slave.opt
      mysql-test/suite/sys_vars/r/slave_checkpoint_period_basic.result
      mysql-test/suite/sys_vars/t/slave_checkpoint_period_basic.test
      sql/log_event.cc
      sql/rpl_rli.h
      sql/rpl_slave.cc
 3337 Alfranio Correia	2011-07-08
      Reduced the timeout period to run the checkpoint routine by setting slave-checkpoint-period to 30.

    modified:
      mysql-test/collections/default.push
=== modified file 'mysql-test/suite/rpl/t/rpl_parallel_switch_sequential.test'
--- a/mysql-test/suite/rpl/t/rpl_parallel_switch_sequential.test	2011-07-05 17:43:04 +0000
+++ b/mysql-test/suite/rpl/t/rpl_parallel_switch_sequential.test	2011-07-08 19:40:52 +0000
@@ -39,7 +39,7 @@ while ($i) {
 let $slave_status= 0;
 let $trx= 0;
 let $alter= 0;
-let $i= 300;
+let $i= 200; # with bigger value test times out on some PB hosts 
 if (`select @@binlog_format like "STATEMENT"`) {
    # relax mtr to scan unsafe warnings
    let $i=100;

=== modified file 'mysql-test/suite/rpl/t/rpl_row_crash_safe-slave.opt'
--- a/mysql-test/suite/rpl/t/rpl_row_crash_safe-slave.opt	2011-06-27 12:12:52 +0000
+++ b/mysql-test/suite/rpl/t/rpl_row_crash_safe-slave.opt	2011-07-08 19:40:52 +0000
@@ -1 +1 @@
---skip-core-file --skip-slave-start --relay-log-info-repository=TABLE --slave-worker-info-repository=TABLE
+--skip-core-file --skip-slave-start --relay-log-info-repository=TABLE --slave-worker-info-repository=TABLE --relay-log-recovery=1

=== modified file 'mysql-test/suite/rpl/t/rpl_stm_mixed_crash_safe-slave.opt'
--- a/mysql-test/suite/rpl/t/rpl_stm_mixed_crash_safe-slave.opt	2011-06-27 12:12:52 +0000
+++ b/mysql-test/suite/rpl/t/rpl_stm_mixed_crash_safe-slave.opt	2011-07-08 19:40:52 +0000
@@ -1 +1 @@
---skip-core-file --skip-slave-start --relay-log-info-repository=TABLE --slave-worker-info-repository=TABLE
+--skip-core-file --skip-slave-start --relay-log-info-repository=TABLE --slave-worker-info-repository=TABLE --relay-log-recovery=1

=== modified file 'mysql-test/suite/sys_vars/r/slave_checkpoint_period_basic.result'
--- a/mysql-test/suite/sys_vars/r/slave_checkpoint_period_basic.result	2011-06-26 12:02:59 +0000
+++ b/mysql-test/suite/sys_vars/r/slave_checkpoint_period_basic.result	2011-07-08 19:40:52 +0000
@@ -9,16 +9,16 @@ select @@session.slave_checkpoint_period
 ERROR HY000: Variable 'slave_checkpoint_period' is a GLOBAL variable
 show global variables like 'slave_checkpoint_period';
 Variable_name	Value
-slave_checkpoint_period	300
+slave_checkpoint_period	period
 show session variables like 'slave_checkpoint_period';
 Variable_name	Value
-slave_checkpoint_period	300
+slave_checkpoint_period	period
 select * from information_schema.global_variables where variable_name='slave_checkpoint_period';
 VARIABLE_NAME	VARIABLE_VALUE
-SLAVE_CHECKPOINT_PERIOD	300
+SLAVE_CHECKPOINT_PERIOD	period
 select * from information_schema.session_variables where variable_name='slave_checkpoint_period';
 VARIABLE_NAME	VARIABLE_VALUE
-SLAVE_CHECKPOINT_PERIOD	300
+SLAVE_CHECKPOINT_PERIOD	period
 set global slave_checkpoint_period=1;
 select @@global.slave_checkpoint_period;
 @@global.slave_checkpoint_period

=== modified file 'mysql-test/suite/sys_vars/t/slave_checkpoint_period_basic.test'
--- a/mysql-test/suite/sys_vars/t/slave_checkpoint_period_basic.test	2011-06-26 12:02:59 +0000
+++ b/mysql-test/suite/sys_vars/t/slave_checkpoint_period_basic.test	2011-07-08 19:40:52 +0000
@@ -9,9 +9,13 @@ SELECT @start_global_value;
 select @@global.slave_checkpoint_period;
 --error ER_INCORRECT_GLOBAL_LOCAL_VAR
 select @@session.slave_checkpoint_period;
+--replace_regex /[0-9]+/period/
 show global variables like 'slave_checkpoint_period';
+--replace_regex /[0-9]+/period/
 show session variables like 'slave_checkpoint_period';
+--replace_regex /[0-9]+/period/
 select * from information_schema.global_variables where variable_name='slave_checkpoint_period';
+--replace_regex /[0-9]+/period/
 select * from information_schema.session_variables where variable_name='slave_checkpoint_period';
 
 #

=== modified file 'sql/log_event.cc'
--- a/sql/log_event.cc	2011-07-08 06:44:35 +0000
+++ b/sql/log_event.cc	2011-07-08 19:40:52 +0000
@@ -2676,8 +2676,11 @@ int Log_event::apply_event(Relay_log_inf
 
   if (rli->is_mts_recovery())
   {
-    bool skip= bitmap_is_set(&rli->recovery_groups, rli->mts_recovery_index);
-
+    bool skip= 
+      bitmap_is_set(&rli->recovery_groups, rli->mts_recovery_index) &&
+      (mts_execution_mode(::server_id, 
+                          rli->mts_group_status == Relay_log_info::MTS_IN_GROUP)
+       == EVENT_EXEC_PARALLEL);
     if (skip)
     {
       DBUG_RETURN(0);

=== modified file 'sql/rpl_rli.h'
--- a/sql/rpl_rli.h	2011-07-08 06:44:35 +0000
+++ b/sql/rpl_rli.h	2011-07-08 19:40:52 +0000
@@ -439,7 +439,7 @@ public:
   Slave_worker  *last_assigned_worker;// is set to a Worker at assigning a group
   /*
     master-binlog ordered queue of Slave_job_group descriptors of groups
-    that are under processing
+    that are under processing. The queue size is @c checkpoint_group.
   */
   Slave_committed_queue *gaq;
   /*

=== modified file 'sql/rpl_slave.cc'
--- a/sql/rpl_slave.cc	2011-07-08 06:44:35 +0000
+++ b/sql/rpl_slave.cc	2011-07-08 19:40:52 +0000
@@ -4145,6 +4145,15 @@ bool mts_checkpoint_routine(Relay_log_in
 #endif
 
   /*
+    rli->checkpoint_group can have two possible values due to
+    two possible status of the last (being scheduled) group. 
+  */
+  DBUG_ASSERT(!rli->gaq->full() ||
+              ((rli->checkpoint_seqno == rli->checkpoint_group -1 &&
+                rli->mts_group_status == Relay_log_info::MTS_IN_GROUP) ||
+               rli->checkpoint_seqno == rli->checkpoint_group));
+
+  /*
     Currently, the checkpoint routine is being called by the SQL Thread.
     For that reason, this function is called call from appropriate points
     in the SQL Thread's execution path and the elapsed time is calculated
@@ -4152,7 +4161,7 @@ bool mts_checkpoint_routine(Relay_log_in
   */
   set_timespec_nsec(curr_clock, 0);
   ulonglong diff= diff_timespec(curr_clock, rli->last_clock);
-  if (!force && diff < period && !rli->gaq->full())
+  if (!force && diff < period)
   {
     /*
       We do not need to execute the checkpoint now because
@@ -4170,7 +4179,7 @@ bool mts_checkpoint_routine(Relay_log_in
       sql_print_error("This an error cnt != mts_checkpoint_period");
 #endif
   } while (!sql_slave_killed(rli->info_thd, rli) &&
-           cnt == 0 && (rli->gaq->full() || force) &&
+           cnt == 0 && force &&
            !DBUG_EVALUATE_IF("check_slave_debug_group", 1, 0) &&
            (my_sleep(rli->mts_coordinator_basic_nap), 1));
   /*
@@ -4330,21 +4339,18 @@ int slave_start_workers(Relay_log_info *
   /* 
      GAQ  queue holds seqno:s of scheduled groups. C polls workers in 
      @c lwm_checkpoint_period to update GAQ (see @c next_event())
-     The length of GAQ is derived from @c opt_mts_slave_worker_queue_len_max
-     to guarantee each assigned job being sent to a WQ will find room in GAQ.
-     mts_slave_worker_queue_len_max * num-of-W:s is the max length case 
-     all jobs contain one event.
+     The length of GAQ is set to be equal to checkpoint_group.
+     Notice, the size matters for mts_checkpoint_routine's progress loop.
   */
 
-  // length of WQ is actually constant though can be made configurable
-  rli->mts_slave_worker_queue_len_max= mts_slave_worker_queue_len_max;
   rli->gaq= new Slave_committed_queue(rli->get_group_master_log_name(),
                                       sizeof(Slave_job_group),
-                                      1 + rli->opt_slave_parallel_workers *
-                                      rli->mts_slave_worker_queue_len_max, n);
+                                      rli->checkpoint_group, n);
   if (!rli->gaq->inited)
     return 1;
 
+  // length of WQ is actually constant though can be made configurable
+  rli->mts_slave_worker_queue_len_max= mts_slave_worker_queue_len_max;
   rli->mts_pending_jobs_size= 0;
   rli->mts_pending_jobs_size_max= ::opt_mts_pending_jobs_size_max;
   rli->mts_wq_underrun_w_id= MTS_WORKER_UNDEF;

No bundle (reason: useless for push emails).
Thread
bzr push into mysql-next-mr-wl5569 branch (andrei.elkin:3337 to 3338) WL#5569Andrei Elkin10 Jul