List:Commits« Previous MessageNext Message »
From:Andrei Elkin Date:December 7 2010 5:35pm
Subject:bzr commit into mysql-next-mr-wl5569 branch (andrei.elkin:3231) WL#5569
View as plain text  
#At file:///home/andrei/MySQL/BZR/2a-23May/WL/mysql-next-mr-wl5569/ based on revid:andrei.elkin@stripped

 3231 Andrei Elkin	2010-12-07
      wl#5569 MTS
      
      Testing related fixes incl master_pos_wait() support and thereafter replacing
      sleeps with the functioning sync_slave_with_master;
      
      Fixing the limitted Q-log-event parallelization. After the fixing mixture of rows- and Q- transactions can run
      concurrently. Q-transaction will be treated sequentially by default.
     @ mysql-test/suite/rpl/r/rpl_parallel.result
        results updated.
     @ mysql-test/suite/rpl/r/rpl_sequential.result
        results updated.
     @ mysql-test/suite/rpl/t/disabled.def
        a nuisance test gets disabled.
     @ mysql-test/suite/rpl/t/rpl_parallel_conf_limits.test
        sleeps go away.
     @ mysql-test/suite/rpl/t/rpl_parallel_conflicts.test
        sleeps go away.
     @ mysql-test/suite/rpl/t/rpl_parallel_start_stop.test
        sleeps go away.
     @ sql/log_event.cc
        Fullfiling long perding time todo:s wrt update_pos and delete ev,
        update_pos() is redundant being superseded by a special commit of the Worker;
        Addressing {B, Q, T} not-parallel case. The issue was due to unability to support Q-log-event
        as quickly as Rows- parallelization.
     @ sql/rpl_rli_pdb.cc
        circular_buffer_queue::de_tail() a very specific method is motivated by
        the limitted support for Q-log-ev parallelization. It may happen to be unnessary once
        Q has become parallel.
     @ sql/rpl_slave.cc
        Implementing CP in successful read branch.

    modified:
      mysql-test/extra/rpl_tests/rpl_parallel_load.test
      mysql-test/suite/rpl/r/rpl_parallel.result
      mysql-test/suite/rpl/r/rpl_parallel_conf_limits.result
      mysql-test/suite/rpl/r/rpl_sequential.result
      mysql-test/suite/rpl/t/disabled.def
      mysql-test/suite/rpl/t/rpl_parallel_conf_limits.test
      mysql-test/suite/rpl/t/rpl_parallel_conflicts.test
      mysql-test/suite/rpl/t/rpl_parallel_start_stop.test
      sql/log_event.cc
      sql/log_event.h
      sql/rpl_rli_pdb.cc
      sql/rpl_rli_pdb.h
      sql/rpl_slave.cc
=== modified file 'mysql-test/extra/rpl_tests/rpl_parallel_load.test'
--- a/mysql-test/extra/rpl_tests/rpl_parallel_load.test	2010-12-04 17:14:50 +0000
+++ b/mysql-test/extra/rpl_tests/rpl_parallel_load.test	2010-12-07 17:35:16 +0000
@@ -87,17 +87,8 @@ while($i)
 --enable_query_log
 
 
-#connection slave;
-
-# Exec log position is not accurate in the prototype
---sleep 2
---disable_query_log
---disable_result_log
-###select sleep(300);
---enable_result_log
---enable_query_log
-
 sync_slave_with_master;
+#connection slave;
 
 --disable_query_log
 --disable_result_log
@@ -213,11 +204,6 @@ connection slave;
 --disable_query_log
 --disable_result_log
 
-### --sleep 15  # todo: convert to wait for the last event has been applied
-
---echo *** you can connect and change the exec mode as well now ***
---echo *** and select * from benchmark before to run consistency check ***
-
 insert into test0.benchmark set state='slave is processing load';
 
 # To force filling timestamp cols with the slave local clock values
@@ -239,13 +225,6 @@ select ts from test0.benchmark where sta
 select time_to_sec(@m_1) - time_to_sec(@m_0) as 'delta_m', 
        time_to_sec(@s_1) - time_to_sec(@s_0) as 'delta_s' into outfile 'delta.out';
 
-# debug: pre diff check-out
---disable_result_log
---disable_query_log
-##select sleep(9999);
---enable_result_log
---enable_query_log
-
 let $i = $databases + 1;
 while($i)
 {
@@ -262,13 +241,6 @@ while($i)
 --enable_result_log
 --enable_query_log
 
-# debug: pre diff check-out
---disable_result_log
---disable_query_log
-##select sleep(9999);
---enable_result_log
---enable_query_log
-
 
 connection master;
 
@@ -288,12 +260,8 @@ while($i)
 --enable_result_log
 --enable_query_log
 
-connection slave;
-
-# same as above - prototype Exec pos in not accurate
---sleep 5
-
-#sync_slave_with_master;
+sync_slave_with_master;
+#connection slave;
 
 
-# End of 4.1 tests
+# End of the tests

=== modified file 'mysql-test/suite/rpl/r/rpl_parallel.result'
--- a/mysql-test/suite/rpl/r/rpl_parallel.result	2010-12-02 17:46:46 +0000
+++ b/mysql-test/suite/rpl/r/rpl_parallel.result	2010-12-07 17:35:16 +0000
@@ -12,8 +12,6 @@ call mtr.add_suppression('Slave: Error d
 include/stop_slave.inc
 start slave;
 stop slave sql_thread;
-*** you can connect and change the exec mode as well now ***
-*** and select * from benchmark before to run consistency check ***
 use test;
 select * from test0.benchmark into outfile 'benchmark.out';
 select ts from test0.benchmark where state like 'master started load' into @m_0;

=== modified file 'mysql-test/suite/rpl/r/rpl_parallel_conf_limits.result'
--- a/mysql-test/suite/rpl/r/rpl_parallel_conf_limits.result	2010-12-02 17:46:46 +0000
+++ b/mysql-test/suite/rpl/r/rpl_parallel_conf_limits.result	2010-12-07 17:35:16 +0000
@@ -4,7 +4,9 @@ reset master;
 reset slave;
 drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
 start slave;
-create view coord_wait_list  as SELECT id from Information_Schema.processlist where state like 'Waiting for Slave Worker%';
+create view coord_wait_list as
+SELECT id from Information_Schema.processlist
+where state like 'Waiting for Slave Worker%';
 include/stop_slave.inc
 set @save.slave_parallel_workers= @@global.slave_parallel_workers;
 set @@global.slave_parallel_workers= 4;
@@ -13,9 +15,6 @@ set @@global.mts_slave_worker_queue_len_
 include/start_slave.inc
 create database d0;
 create table d0.t1 (a int auto_increment primary key) engine=innodb;
-select sleep(2);
-sleep(2)
-0
 begin;
 insert into d0.t1 set a=null;
 begin;

=== modified file 'mysql-test/suite/rpl/r/rpl_sequential.result'
--- a/mysql-test/suite/rpl/r/rpl_sequential.result	2010-12-02 17:46:46 +0000
+++ b/mysql-test/suite/rpl/r/rpl_sequential.result	2010-12-07 17:35:16 +0000
@@ -12,8 +12,6 @@ call mtr.add_suppression('Slave: Error d
 include/stop_slave.inc
 start slave;
 stop slave sql_thread;
-*** you can connect and change the exec mode as well now ***
-*** and select * from benchmark before to run consistency check ***
 use test;
 select * from test0.benchmark into outfile 'benchmark.out';
 select ts from test0.benchmark where state like 'master started load' into @m_0;

=== modified file 'mysql-test/suite/rpl/t/disabled.def'
--- a/mysql-test/suite/rpl/t/disabled.def	2010-09-06 12:52:04 +0000
+++ b/mysql-test/suite/rpl/t/disabled.def	2010-12-07 17:35:16 +0000
@@ -12,3 +12,4 @@
 
 rpl_row_create_table      : Bug#51574 2010-02-27 andrei failed different way than earlier with bug#45576
 rpl_spec_variables        : BUG#47661 2009-10-27 jasonh rpl_spec_variables fails on PB2 hpux
+rpl_row_ignorable_event   : Bug#58784 7-12-2010 andrei  rpl_row_ignorable_event fails on PB2

=== modified file 'mysql-test/suite/rpl/t/rpl_parallel_conf_limits.test'
--- a/mysql-test/suite/rpl/t/rpl_parallel_conf_limits.test	2010-12-02 17:46:46 +0000
+++ b/mysql-test/suite/rpl/t/rpl_parallel_conf_limits.test	2010-12-07 17:35:16 +0000
@@ -2,8 +2,11 @@
 # WL#5569 MTS
 #
 # The test verifies correctness of MTS execution when system meets
-# various limits due to configuration options.
+# various limits due to few configuration options:
 #
+# @@global.mts_pending_jobs_size_max
+# @@global.mts_slave_worker_queue_len_max
+# @@global.mts_partition_hash_soft_max
 
 source include/master-slave.inc;
 # no support for Query-log-event in this test
@@ -11,7 +14,9 @@ source include/have_binlog_format_row.in
 
 connection slave;
 
-create view coord_wait_list  as SELECT id from Information_Schema.processlist where state like 'Waiting for Slave Worker%';
+create view coord_wait_list as
+       SELECT id from Information_Schema.processlist
+              where state like 'Waiting for Slave Worker%';
 
 # restart in Parallel
 source include/stop_slave.inc;
@@ -30,9 +35,8 @@ create database d0;
 create table d0.t1 (a int auto_increment primary key) engine=innodb;
 
 
-connection slave;
+sync_slave_with_master;
 
-select sleep(2);
 begin;
 insert into d0.t1 set a=null; # lock a row that master has inserted into
 
@@ -87,14 +91,8 @@ source include/start_slave.inc;
 connection master;
 create table d0.t2 (a int auto_increment primary key, b text null) engine=innodb;
 
-connection slave;
+sync_slave_with_master;
 
-# sync_slave_with_master
---disable_query_log
---disable_result_log
-select sleep(2);
---enable_result_log
---enable_query_log
 
 begin;
 insert into d0.t2 set a= 1;
@@ -180,17 +178,8 @@ while ($i)
   dec $i;
 }
 
-# TODO:
+sync_slave_with_master;
 
-connection slave;
-
---disable_query_log
---disable_result_log
-select sleep(1);
---enable_result_log
---enable_query_log
-
-#sync_slave_with_master
 set @@global.mts_partition_hash_soft_max= @save.mts_partition_hash_soft_max;
 
 connection master;
@@ -203,8 +192,8 @@ while ($i)
 }
 
 
-connection slave;
-#sync_slave_with_master
+#connection slave;
+sync_slave_with_master;
 
 
 #
@@ -216,13 +205,10 @@ connection master;
 drop database d0;
 
 
-# sync_slave_with_master
-connection slave;
+sync_slave_with_master;
+#connection slave;
 
 drop view coord_wait_list;
-
---sleep 2
-
 set @@global.slave_parallel_workers= @save.slave_parallel_workers;
 
 

=== modified file 'mysql-test/suite/rpl/t/rpl_parallel_conflicts.test'
--- a/mysql-test/suite/rpl/t/rpl_parallel_conflicts.test	2010-12-02 17:46:46 +0000
+++ b/mysql-test/suite/rpl/t/rpl_parallel_conflicts.test	2010-12-07 17:35:16 +0000
@@ -67,14 +67,9 @@ create table d3.t1 (a int auto_increment
 #
 # two conflicting jobs to follow
 
-# sync_slave_with_master
-
-# TODO: remove once `sync_slave_with_master' got fixed
-
---sleep 3
-
+sync_slave_with_master;
 # To be really conflicting slave needs to block commit of the first.
-connection slave;
+#connection slave;
 
 begin;
 insert into d2.t1 values (1);
@@ -210,10 +205,8 @@ drop database d1;
 drop database d2;
 drop database d3;
 
---sleep 4
-
-connection slave;
-#sync_slave_with_master;
+sync_slave_with_master;
+#connection slave;
 
 drop view coord_wait_list;
 set @@global.slave_parallel_workers= @save.slave_parallel_workers;

=== modified file 'mysql-test/suite/rpl/t/rpl_parallel_start_stop.test'
--- a/mysql-test/suite/rpl/t/rpl_parallel_start_stop.test	2010-12-02 17:46:46 +0000
+++ b/mysql-test/suite/rpl/t/rpl_parallel_start_stop.test	2010-12-07 17:35:16 +0000
@@ -53,12 +53,6 @@ source include/wait_until_rows_count.inc
 
 select id from coord_proc_list into @c_id;
 
---disable_query_log
---disable_result_log
-#select sleep(300);
---enable_query_log
---enable_result_log
-
 kill query @c_id;
 
 let $count= 0;
@@ -80,20 +74,10 @@ CREATE TABLE t1 (a int primary key);
 
 insert into t1 values (1),(2);
 
-#
-# todo: remove when recovery recovers `sync_slave_with_master'
-#
-
---sleep 3
 
---disable_result_log
---disable_query_log
-#select sleep(600);
---enable_result_log
---enable_query_log
+sync_slave_with_master;
+#connection slave;
 
-connection slave;
-# sync_slave_with_master;
 let $count= 2;
 let $table= t1;
 source include/wait_until_rows_count.inc;
@@ -112,12 +96,6 @@ let $count= 0;
 let $table= worker_proc_list;
 source include/wait_until_rows_count.inc;
 
---disable_result_log
---disable_query_log
-#select sleep(600);
---enable_result_log
---enable_query_log
-
 source include/wait_for_slave_sql_to_stop.inc;
 delete from t1;
 
@@ -128,14 +106,8 @@ source include/start_slave.inc;
 connection master;
 drop table t1;
 
-#
-# todo: remove when recovery recovers `sync_slave_with_master'
-#
-
---sleep 3
-
-connection slave;
-#sync_slave_with_master;
+sync_slave_with_master;
+#connection slave;
 
 drop view worker_proc_list;
 drop view coord_proc_list;

=== modified file 'sql/log_event.cc'
--- a/sql/log_event.cc	2010-12-05 20:04:17 +0000
+++ b/sql/log_event.cc	2010-12-07 17:35:16 +0000
@@ -2527,8 +2527,8 @@ int Log_event::apply_event(Relay_log_inf
   {
     if (parallel)
     {
-      // This case relates to Query parallel apply which breaks into
-      // DDL and {B, Q, T} group, where Q owns g-parallel property.
+      // This `only-sequential' case relates to Query parallel apply which
+      // breaks into DDL and {B, Q, T} group, where Q owns g-parallel property.
 
       // Apply possibly deferred B
       if (rli->curr_group_da.elements > 0)
@@ -2539,9 +2539,16 @@ int Log_event::apply_event(Relay_log_inf
         DBUG_ASSERT(rli->curr_group_da.elements == 0);
         DBUG_ASSERT(rli->curr_group_seen_begin);
 
-        // TODO: rollback
-        // c_rli->gaq->assigned_group_index= rli->gaq->en_queue((void *) &g);
+        // While Query-log-event is not supported GAQ needs rollback
+        if (rli->curr_group_seen_begin)
+        {
+          Slave_job_group g;
+          ulong ind= rli->gaq->de_tail((uchar *) &g);
+          const_cast<Relay_log_info*>(rli)->mts_total_groups--;
 
+          DBUG_ASSERT(rli->last_assigned_worker == NULL);
+          DBUG_ASSERT(c_rli->gaq->assigned_group_index == ind);
+        }
         res= ev_begin->do_apply_event(rli);
         delete ev_begin;
         /* B appears to be serial, reset parallel status of group 
@@ -2716,6 +2723,13 @@ int slave_worker_exec_job(Slave_worker *
     w->slave_worker_ends_group(ev, error); /* last done sets post exec */
   }
 
+    /*
+      commit_positions() fullfils group pos incr and flush
+      TODO: remove
+      if (!error)
+      ev->update_pos(w->w_rli);
+    */
+
   mysql_mutex_lock(&w->jobs_lock);
   de_queue(&w->jobs, job_item);
 
@@ -2725,17 +2739,6 @@ int slave_worker_exec_job(Slave_worker *
     w->jobs.overfill= FALSE;
     mysql_cond_signal(&w->jobs_cond);
   }
-
-  /*
-    preserving signatures of existing methods.
-    todo: convert update_pos(w->w_rli) -> update_pos(w)
-          to remove w_rli w/a
-    TODO: remove ?
-  */
-  if (!error)
-    ev->update_pos(w->w_rli);
-
-
   mysql_mutex_unlock(&w->jobs_lock);
 
   /* statistics */
@@ -2786,9 +2789,8 @@ int slave_worker_exec_job(Slave_worker *
 
 err:
 
-  // TODO!!! ANDREI to RESTORE
-  // if (!ev)
-  //    delete ev;  // after ev->update_pos() event is garbage
+  if (!ev)
+    delete ev;  // after ev->update_pos() event is garbage
 
   DBUG_RETURN(error);
 }

=== modified file 'sql/log_event.h'
--- a/sql/log_event.h	2010-11-27 15:36:50 +0000
+++ b/sql/log_event.h	2010-12-07 17:35:16 +0000
@@ -1197,7 +1197,7 @@ public:
   }
 
   /**
-     MST: some events can be applied by Coordinator concurrently with Workers.
+     MST: some events have to be applied by Coordinator concurrently with Workers.
 
      @return TRUE  if that's the case,
              FALSE otherwise.

=== modified file 'sql/rpl_rli_pdb.cc'
--- a/sql/rpl_rli_pdb.cc	2010-12-04 17:14:50 +0000
+++ b/sql/rpl_rli_pdb.cc	2010-12-07 17:35:16 +0000
@@ -590,6 +590,33 @@ ulong circular_buffer_queue::de_queue(uc
   return ret;
 }
 
+/**
+   removing an item from the tail side
+*/
+ulong circular_buffer_queue::de_tail(uchar *val)
+{
+  ulong ret;
+  if (e == s)
+  {
+    DBUG_ASSERT(len == 0);
+    return (ulong) -1;
+  }
+
+  a= (e + len - 1) % s;
+  get_dynamic(&Q, val, a);
+  len--;
+  
+  // post boundary cond
+  if (a == e)
+    e= s;
+
+  DBUG_ASSERT(e == s ||
+              (len == (a >= e)? (a - e) :
+               (s + a - e)));
+
+  return a;
+
+}
 /** 
     @return the used index at success or -1 when queue is full
 */

=== modified file 'sql/rpl_rli_pdb.h'
--- a/sql/rpl_rli_pdb.h	2010-12-04 17:14:50 +0000
+++ b/sql/rpl_rli_pdb.h	2010-12-07 17:35:16 +0000
@@ -67,10 +67,14 @@ public:
       location.
       
       @return the queue's array index that the de-queued item
-      locates at, or
+      located at, or
       an error encoded in beyond the index legacy range.
    */
   ulong de_queue(uchar *);
+  /**
+     Similar to de_queue but extracting happens from the tail side.
+  */
+  ulong de_tail(uchar *val);
 
   /**
     return the index where the arg item locates

=== modified file 'sql/rpl_slave.cc'
--- a/sql/rpl_slave.cc	2010-12-05 20:04:17 +0000
+++ b/sql/rpl_slave.cc	2010-12-07 17:35:16 +0000
@@ -168,7 +168,7 @@ static int terminate_slave_thread(THD *t
                                   bool skip_lock);
 static bool check_io_slave_killed(THD *thd, Master_info *mi, const char *info);
 int slave_worker_exec_job(Slave_worker * w, Relay_log_info *rli);
-bool mts_checkpoint_routine(Relay_log_info *rli);
+static bool mts_checkpoint_routine(Relay_log_info *rli, bool locked);
 
 /*
   Find out which replications threads are running
@@ -3617,7 +3617,7 @@ err:
 
    @return FALSE success, TRUE otherwise
 */
-bool mts_checkpoint_routine(Relay_log_info *rli)
+bool mts_checkpoint_routine(Relay_log_info *rli, bool locked)
 {
   bool error= FALSE;
   ulong cnt;
@@ -3638,7 +3638,8 @@ bool mts_checkpoint_routine(Relay_log_in
   };
   sort_dynamic(&rli->least_occupied_workers, (qsort_cmp) ulong_cmp);
 
-  mysql_mutex_lock(&rli->data_lock);
+  if (!locked)
+    mysql_mutex_lock(&rli->data_lock);
 
   // Coordinator::commit_positions() {
 
@@ -3647,6 +3648,9 @@ bool mts_checkpoint_routine(Relay_log_in
   // group_master_log_name is updated only by Coordinator and it can't change
   // within checkpoint interval because Coordinator flushes the updated value
   // at once.
+  // Note, unlike group_master_log_name, event_relay_log_pos is updated solely 
+  // within Coordinator read loop context. Hence, it's possible at times 
+  // event_rlp > group_rlp.
 
   rli->set_group_master_log_pos(rli->gaq->lwm.group_master_log_pos);
   rli->set_group_relay_log_pos(rli->gaq->lwm.group_relay_log_pos);
@@ -3654,15 +3658,22 @@ bool mts_checkpoint_routine(Relay_log_in
   if (rli->gaq->lwm.group_relay_log_name[0] != 0)
     rli->set_group_relay_log_name(rli->gaq->lwm.group_relay_log_name);
 
-  error= rli->flush_info(TRUE);
+  //todo: uncomment notifies when UNTIL will be supported
 
-  // end of commit_positions
+  //rli->notify_group_master_log_name_update();
+  //rli->notify_group_relay_log_name_update();
 
-  mysql_mutex_unlock(&rli->data_lock);
+  // todo: optimize with if (wait_flag) broadcast
+  //       waiter: set wait_flag; waits....; drops wait_flag;
+  mysql_cond_broadcast(&rli->data_cond);
+  if (!locked)
+    mysql_mutex_unlock(&rli->data_lock);
+
+  error= rli->flush_info(TRUE);
+  // end of commit_positions
 
 end:
   
-  // ANDREI NOTIFICATIONS?
   DBUG_RETURN(error);
 }
 
@@ -5235,6 +5246,23 @@ static Log_event* next_event(Relay_log_i
       ev->future_event_relay_log_pos= rli->get_future_event_relay_log_pos();
       if (hot_log)
         mysql_mutex_unlock(log_lock);
+      /* 
+         MTS checkpoint in the successful read branch 
+      */
+      if (rli->is_parallel_exec() && rli->lwm_period != 0.0)
+      {
+        int ret= 0;
+        struct timespec waittime;
+        ulong period= rli->lwm_period * 1000000000UL;
+        set_timespec_nsec(rli->curr_clock, 0);
+        ulong diff= diff_timespec(rli->curr_clock, rli->last_clock);
+        if (diff > period)
+        {
+          mts_checkpoint_routine(rli, TRUE);
+          set_timespec_nsec(rli->last_clock, 0);
+        }
+      }
+
       DBUG_RETURN(ev);
     }
     DBUG_ASSERT(thd==rli->info_thd);
@@ -5363,8 +5391,8 @@ static Log_event* next_event(Relay_log_i
             ulong diff= diff_timespec(rli->curr_clock, rli->last_clock);
             if (diff > period)
             {
-               mts_checkpoint_routine(rli);
-               set_timespec_nsec(rli->last_clock, 0);
+              mts_checkpoint_routine(rli, FALSE);
+              set_timespec_nsec(rli->last_clock, 0);
             }
             set_timespec_nsec(waittime, period);
             thd->enter_cond(log_cond, log_lock,


Attachment: [text/bzr-bundle] bzr/andrei.elkin@oracle.com-20101207173516-rqf0s7jiwjny97by.bundle
Thread
bzr commit into mysql-next-mr-wl5569 branch (andrei.elkin:3231) WL#5569Andrei Elkin7 Dec