MySQL Lists are EOL. Please join:

List:Commits« Previous MessageNext Message »
From:Alfranio Correia Date:February 17 2009 10:23pm
Subject:Re: bzr commit into mysql-6.0-rpl branch (alfranio.correia:2811)
Bug#40337
View as plain text  
Hi Mats,

I've fixed what you requested to.
See in-line comments.

Cheers.

Mats Kindahl wrote:
> Hi Alfranio,
>
> I see no problems with the patch, so I approve it. However, have a look at the
> comments inline below.
>
> Just my few cents,
> Mats Kindahl
>
> Alfranio Correia wrote:
>   
>> #At
> file:///home/acorreia/workspace.sun/repository.mysql/bzrwork/bug-40337/mysql-6.0-rpl/
> based on revid:alfranio.correia@stripped
>>
>>  2811 Alfranio Correia	2009-02-12
>>       BUG#40337  Fsyncing master and relay log to disk after every event is too
> slow
>>       
>>       Post-fix.
>> added:
>>   mysql-test/suite/rpl/r/rpl_sync.result
>>   mysql-test/suite/rpl/t/rpl_sync-slave.opt
>>   mysql-test/suite/rpl/t/rpl_sync.test
>> modified:
>>   sql/rpl_mi.cc
>>   sql/rpl_mi.h
>>   sql/rpl_rli.cc
>>   sql/rpl_rli.h
>>   sql/slave.cc
>>   sql/sql_binlog.cc
>>
>> === added file 'mysql-test/suite/rpl/r/rpl_sync.result'
>> --- a/mysql-test/suite/rpl/r/rpl_sync.result	1970-01-01 00:00:00 +0000
>> +++ b/mysql-test/suite/rpl/r/rpl_sync.result	2009-02-12 09:13:57 +0000
>> @@ -0,0 +1,75 @@
>> +=====Configuring the enviroment=======;
>> +stop slave;
>> +drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
>> +reset master;
>> +reset slave;
>> +drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
>> +start slave;
>> +CREATE TABLE t1(a INT, PRIMARY KEY(a)) engine=innodb;
>> +insert into t1(a) values(1);
>> +insert into t1(a) values(2);
>> +insert into t1(a) values(3);
>> +=====Inserting data on the master but without the SQL Thread being
> running=======;
>> +stop slave SQL_THREAD;
>> +insert into t1(a) values(4);
>> +insert into t1(a) values(5);
>> +insert into t1(a) values(6);
>> +=====Removing relay log files and crashing/recoverying the slave=======;
>> +stop slave IO_THREAD;
>> +failure
>> +SET SESSION debug="d,crash_before_rotate_relaylog";
>> +FLUSH LOGS;
>> +ERROR HY000: Lost connection to MySQL server during query
>> +=====Dumping and comparing tables=======;
>> +start slave;
>> +select * from t1;
>> +a
>> +1
>> +2
>> +3
>> +4
>> +5
>> +6
>> +select * from t1;
>> +a
>> +1
>> +2
>> +3
>> +4
>> +5
>> +6
>>     
>
> Could you use diff_tables.inc instead? It works since the tables are supposed to
> be equal, but otherwise, it doesn't work.
>   

done.
>   
>> +=====Corrupting the master.info=======;
>> +stop slave;
>> +FLUSH LOGS;
>> +insert into t1(a) values(7);
>> +insert into t1(a) values(8);
>> +insert into t1(a) values(9);
>> +SET SESSION debug="d,crash_before_rotate_relaylog";
>> +FLUSH LOGS;
>> +ERROR HY000: Lost connection to MySQL server during query
>> +=====Dumping and comparing tables=======;
>> +start slave;
>> +select * from t1;
>> +a
>> +1
>> +2
>> +3
>> +4
>> +5
>> +6
>> +7
>> +8
>> +9
>> +select * from t1;
>> +a
>> +1
>> +2
>> +3
>> +4
>> +5
>> +6
>> +7
>> +8
>> +9
>>     
>
> Same here.
>   

done.
>   
>> +=====Clean up=======;
>> +drop table t1;
>>
>> === added file 'mysql-test/suite/rpl/t/rpl_sync-slave.opt'
>> --- a/mysql-test/suite/rpl/t/rpl_sync-slave.opt	1970-01-01 00:00:00 +0000
>> +++ b/mysql-test/suite/rpl/t/rpl_sync-slave.opt	2009-02-12 09:13:57 +0000
>> @@ -0,0 +1 @@
>> +--sync-relay-log-info=1 --relay-log-recovery=1
>>
>> === added file 'mysql-test/suite/rpl/t/rpl_sync.test'
>> --- a/mysql-test/suite/rpl/t/rpl_sync.test	1970-01-01 00:00:00 +0000
>> +++ b/mysql-test/suite/rpl/t/rpl_sync.test	2009-02-12 09:13:57 +0000
>> @@ -0,0 +1,143 @@
>>
> +########################################################################################
>> +# This test verifies the options --sync-relay-log-info and --relay-log-recovery
> by 
>> +# crashing the slave in two different situations:
>> +#  (case-1) - Corrupt the relay log with changes which were not processed by
>> +#  the SQL Thread and crashes it.
>> +#  (case-2) - Corrupt the master.info with wrong coordinates and crashes it.
>> +#
>> +#  Case 1:
>> +#    1 - Stops the SQL Thread
>> +#    2 - Inserts new records into the master.
>> +#    3 - Corrupts the relay-log.bin* which most likely has such changes.
>> +#    4 - Crashes the slave
>> +#    5 - Verifies if the slave is sync with the master which means that the
> information
>> +#    loss was circumvented by the recovery process.
>> +#
>> +#  Case 2:
>> +#    1 - Stops the SQL/IO Threads
>> +#    2 - Inserts new records into the master.
>> +#    3 - Corrupts the master.info with wrong coordinates.
>> +#    4 - Crashes the slave
>> +#    5 - Verifies if the slave is sync with the master which means that the
> information
>> +#    loss was circumvented by the recovery process.
>>
> +########################################################################################
>> +
>>
> +########################################################################################
>> +#                                Configuring the environment
>>
> +########################################################################################
>> +--echo =====Configuring the enviroment=======;
>> +--source include/master-slave.inc
>> +--source include/not_embedded.inc
>> +--source include/not_valgrind.inc
>> +
>> +--source include/have_debug.inc
>> +
>> +--source include/have_innodb.inc
>> +
>> +CREATE TABLE t1(a INT, PRIMARY KEY(a)) engine=innodb;
>> +
>> +insert into t1(a) values(1);
>> +insert into t1(a) values(2);
>> +insert into t1(a) values(3);
>> +
>>
> +########################################################################################
>> +#                             Case 1: Corrupt a relay-log.bin*
>>
> +########################################################################################
>> +--echo =====Inserting data on the master but without the SQL Thread being
> running=======;
>> +save_master_pos;
>> +connection slave;
>> +sync_with_master;
>>     
>
> These three can be replaced with "sync_slave_with_master;"
>   
done.
>   
>> +
>> +connection slave;
>> +let $MYSQLD_SLAVE_DATADIR= `select @@datadir`;
>> +--replace_result $MYSQLD_SLAVE_DATADIR MYSQLD_SLAVE_DATADIR
>> +--copy_file $MYSQLD_SLAVE_DATADIR/master.info
> $MYSQLD_SLAVE_DATADIR/master.backup
>> +stop slave SQL_THREAD;
>> +source include/wait_for_slave_sql_to_stop.inc;
>> +
>> +connection master;
>> +insert into t1(a) values(4);
>> +insert into t1(a) values(5);
>> +insert into t1(a) values(6);
>> +
>> +--echo =====Removing relay log files and crashing/recoverying the slave=======;
>> +connection slave;
>> +stop slave IO_THREAD;
>> +source include/wait_for_slave_io_to_stop.inc;
>> +
>> +let $file= query_get_value("SHOW SLAVE STATUS", Relay_Log_File, 1);
>> +--replace_result $MYSQLD_SLAVE_DATADIR MYSQLD_SLAVE_DATADIR
>> +--exec echo "failure" > $MYSQLD_SLAVE_DATADIR/$file
>> +--exec cat  $MYSQLD_SLAVE_DATADIR/$file
>> +
>> +--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.2.expect
>> +SET SESSION debug="d,crash_before_rotate_relaylog";
>> +--error 2013
>> +FLUSH LOGS;
>> +
>> +--enable_reconnect
>> +--source include/wait_until_connected_again.inc
>> +
>> +--echo =====Dumping and comparing tables=======;
>> +start slave;
>> +source include/wait_for_slave_to_start.inc;
>> +
>> +connection master;
>> +save_master_pos;
>> +connection slave;
>> +sync_with_master;
>>     
>
> sync_slave_with_master;
>   
done.
>   
>> +
>> +connection master;
>> +select * from t1;
>> +
>> +connection slave;
>> +select * from t1;
>> +
>>
> +########################################################################################
>> +#                             Case 2: Corrupt a master.info
>>
> +########################################################################################
>> +--echo =====Corrupting the master.info=======;
>> +connection slave;
>> +stop slave;
>> +source include/wait_for_slave_to_stop.inc;
>> +
>> +connection master;
>> +FLUSH LOGS;
>> +
>> +insert into t1(a) values(7);
>> +insert into t1(a) values(8);
>> +insert into t1(a) values(9);
>> +
>> +connection slave;
>> +--replace_result $MYSQLD_SLAVE_DATADIR MYSQLD_SLAVE_DATADIR
>> +--exec cat $MYSQLD_SLAVE_DATADIR/master.backup >
> $MYSQLD_SLAVE_DATADIR/master.info
>> +
>> +--exec echo "restart" > $MYSQLTEST_VARDIR/tmp/mysqld.2.expect
>> +SET SESSION debug="d,crash_before_rotate_relaylog";
>> +--error 2013
>> +FLUSH LOGS;
>> +
>> +--enable_reconnect
>> +--source include/wait_until_connected_again.inc
>> +
>> +--echo =====Dumping and comparing tables=======;
>> +start slave;
>> +source include/wait_for_slave_to_start.inc;
>> +
>> +connection master;
>> +save_master_pos;
>> +connection slave;
>> +sync_with_master;
>>     
>
> sync_slave_with_master;
>   
done.
>   
>> +
>> +connection master;
>> +select * from t1;
>> +
>> +connection slave;
>> +select * from t1;
>> +
>>
> +########################################################################################
>> +#                                      Clean up
>>
> +########################################################################################
>> +--echo =====Clean up=======;
>> +connection master;
>> +drop table t1;
>>
>> === modified file 'sql/rpl_mi.cc'
>> --- a/sql/rpl_mi.cc	2009-02-04 16:11:20 +0000
>> +++ b/sql/rpl_mi.cc	2009-02-12 09:13:57 +0000
>> @@ -30,13 +30,14 @@ int init_strvar_from_file(char *var, int
>>  int init_floatvar_from_file(float* var, IO_CACHE* f, float default_val);
>>  int init_dynarray_intvar_from_file(DYNAMIC_ARRAY* arr, IO_CACHE* f);
>>  
>> -Master_info::Master_info()
>> +Master_info::Master_info(bool is_slave_recovery)
>>    :Slave_reporting_capability("I/O"),
>>     ssl(0), ssl_verify_server_cert(0), fd(-1),  io_thd(0), port(MYSQL_PORT),
>>     connect_retry(DEFAULT_CONNECT_RETRY), heartbeat_period(0),
>>     received_heartbeats(0), inited(0), master_id(0),
>>     abort_slave(0), slave_running(0), slave_run_id(0),
>> -   sync_counter(0)
>> +   sync_counter(0),
>> +   rli(is_slave_recovery) 
>>  {
>>    host[0] = 0; user[0] = 0; password[0] = 0;
>>    ssl_ca[0]= 0; ssl_capath[0]= 0; ssl_cert[0]= 0;
>>
>> === modified file 'sql/rpl_mi.h'
>> --- a/sql/rpl_mi.h	2009-02-04 16:11:20 +0000
>> +++ b/sql/rpl_mi.h	2009-02-12 09:13:57 +0000
>> @@ -59,7 +59,7 @@
>>  class Master_info : public Slave_reporting_capability
>>  {
>>   public:
>> -  Master_info();
>> +  Master_info(bool is_slave_recovery);
>>    ~Master_info();
>>    bool shall_ignore_server_id(ulong s_id);
>>  
>>
>> === modified file 'sql/rpl_rli.cc'
>> --- a/sql/rpl_rli.cc	2009-02-04 16:11:20 +0000
>> +++ b/sql/rpl_rli.cc	2009-02-12 09:13:57 +0000
>> @@ -30,7 +30,7 @@ int init_strvar_from_file(char *var, int
>>  			  const char *default_val);
>>  
>>  
>> -Relay_log_info::Relay_log_info()
>> +Relay_log_info::Relay_log_info(bool is_slave_recovery)
>>    :Slave_reporting_capability("SQL"),
>>     no_storage(FALSE), replicate_same_server_id(::replicate_same_server_id),
>>     info_fd(-1), cur_log_fd(-1), 
>> @@ -50,6 +50,8 @@ Relay_log_info::Relay_log_info()
>>  {
>>    DBUG_ENTER("Relay_log_info::Relay_log_info");
>>  
>> +  is_relay_log_recovery= is_slave_recovery;
>> +
>>     
>
> Why don't you add this to the initializer list above?
>   
done.
>   
>>    group_relay_log_name[0]= event_relay_log_name[0]=
>>      group_master_log_name[0]= 0;
>>    until_log_name[0]= ign_master_log_name_end[0]= 0;
>> @@ -265,14 +267,14 @@ Failed to open the existing relay log in
>>    }
>>  
>>  #ifndef DBUG_OFF
>> +  if (!(rli->is_relay_log_recovery))
>>    {
>>      char llbuf1[22], llbuf2[22];
>>      DBUG_PRINT("info", ("my_b_tell(rli->cur_log)=%s
> rli->event_relay_log_pos=%s",
>>                          llstr(my_b_tell(rli->cur_log),llbuf1),
>>                          llstr(rli->event_relay_log_pos,llbuf2)));
>>      DBUG_ASSERT(rli->event_relay_log_pos >= BIN_LOG_HEADER_SIZE);
>> -    DBUG_ASSERT(rli->is_relay_log_recovery || 
>> -                (my_b_tell(rli->cur_log) == rli->event_relay_log_pos));
>> +    DBUG_ASSERT((my_b_tell(rli->cur_log) == rli->event_relay_log_pos));
>>    }
>>  #endif
>>  
>>
>> === modified file 'sql/rpl_rli.h'
>> --- a/sql/rpl_rli.h	2009-02-04 16:11:20 +0000
>> +++ b/sql/rpl_rli.h	2009-02-12 09:13:57 +0000
>> @@ -273,7 +273,7 @@ public:
>>    char ign_master_log_name_end[FN_REFLEN];
>>    ulonglong ign_master_log_pos_end;
>>  
>> -  Relay_log_info();
>> +  Relay_log_info(bool is_slave_recovery);
>>    ~Relay_log_info();
>>  
>>    /*
>>
>> === modified file 'sql/slave.cc'
>> --- a/sql/slave.cc	2009-02-04 16:11:20 +0000
>> +++ b/sql/slave.cc	2009-02-12 09:13:57 +0000
>> @@ -220,6 +220,7 @@ void unlock_slave_threads(Master_info* m
>>  int init_slave()
>>  {
>>    DBUG_ENTER("init_slave");
>> +  int error= 0;
>>  
>>    /*
>>      This is called when mysqld starts. Before client connections are
>> @@ -233,9 +234,12 @@ int init_slave()
>>    */
>>  
>>    if (pthread_key_create(&RPL_MASTER_INFO, NULL))
>> +  {
>> +    error= 1;
>>      goto err;
>> +  }
>>  
>> -  active_mi= new Master_info;
>> +  active_mi= new Master_info(relay_log_recovery);
>>     
>
> Hmmm... OK.
>
> Caravaning through the memory allocator does not scale very well, but this is a
> single thread and it is not very commonly executed.
>   
ok. This is executed in the startup.
>   
>>  
>>    /*
>>      If master_host is not specified, try to read it from the master_info file.
>> @@ -245,6 +249,7 @@ int init_slave()
>>    if (!active_mi)
>>    {
>>      sql_print_error("Failed to allocate memory for the master info structure");
>> +    error= 1;
>>      goto err;
>>    }
>>  
>> @@ -252,12 +257,15 @@ int init_slave()
>>                         1, (SLAVE_IO | SLAVE_SQL)))
>>    {
>>      sql_print_error("Failed to initialize the master info structure");
>> +    error= 1;
>>      goto err;
>>    }
>>  
>> -  active_mi->rli.is_relay_log_recovery= relay_log_recovery;
>>    if (active_mi->rli.is_relay_log_recovery &&
> init_recovery(active_mi))
>> +  {
>> +    error= 1;
>>      goto err;
>> +  }
>>  
>>    /* If server id is not set, start_slave_thread() will say it */
>>  
>> @@ -271,43 +279,43 @@ int init_slave()
>>                              SLAVE_IO | SLAVE_SQL))
>>      {
>>        sql_print_error("Failed to create slave threads");
>> +      error= 1;
>>        goto err;
>>      }
>>    }
>> -  active_mi->rli.is_relay_log_recovery= 0;
>> -  pthread_mutex_unlock(&LOCK_active_mi);
>> -  DBUG_RETURN(0);
>>  
>>  err:
>> -  active_mi->rli.is_relay_log_recovery= 0;
>> +  active_mi->rli.is_relay_log_recovery= false;
>>     
>
> We use FALSE instead of false.
>   
fixed.
>   
>>    pthread_mutex_unlock(&LOCK_active_mi);
>> -  DBUG_RETURN(1);
>> +  DBUG_RETURN(error);
>>  }
>>  
>>  /*
>> - * Updates the master info based on the information stored in the
>> - * relay info and ignores relay logs previously retrieved by the IO 
>> - * thread, which thus starts fetching again based on to the  
>> - * group_master_log_pos and group_master_log_name. Eventually, the old
>> - * relay logs will be purged by the normal purge mechanism.
>> - * 
>> - * In the feature, we can improve this routine in order to avoid throwing
>> - * away logs that are safely stored in the disk.
>> - * 
>> - * There is no need for a mutex as the caller (i.e. init_slave) already 
>> - * has one acquired.
>> - * 
>> - * Specifically, the following structures are updated:
>> - * 
>> - * 1 - mi->master_log_pos  <-- rli->group_master_log_pos
>> - * 2 - mi->master_log_name <-- rli->group_master_log_name
>> - * 3 - It moves the relay log to the new relay log file, by
>> - *     rli->group_relay_log_pos  <-- BIN_LOG_HEADER_SIZE;
>> - *     rli->event_relay_log_pos  <-- BIN_LOG_HEADER_SIZE;
>> - *     rli->group_relay_log_name <-- rli->relay_log.get_log_fname();
>> - *     rli->event_relay_log_name <-- rli->relay_log.get_log_fname();
>> - * 
>> - *  If there is an error, it returns (1), otherwise returns (0).
>> +  Updates the master info based on the information stored in the
>> +  relay info and ignores relay logs previously retrieved by the IO 
>> +  thread, which thus starts fetching again based on to the  
>> +  group_master_log_pos and group_master_log_name. Eventually, the old
>> +  relay logs will be purged by the normal purge mechanism.
>> +
>> +  In the feature, we should improve this routine in order to avoid throwing
>> +  away logs that are safely stored in the disk. Note also that this recovery 
>> +  routine relies on the correctness of the relay-log.info and only tolerates 
>> +  coordinate problems in master.info.
>> +  
>> +  In this function, there is no need for a mutex as the caller 
>> +  (i.e. init_slave) already has one acquired.
>> +  
>> +  Specifically, the following structures are updated:
>> + 
>> +  1 - mi->master_log_pos  <-- rli->group_master_log_pos
>> +  2 - mi->master_log_name <-- rli->group_master_log_name
>> +  3 - It moves the relay log to the new relay log file, by
>> +      rli->group_relay_log_pos  <-- BIN_LOG_HEADER_SIZE;
>> +      rli->event_relay_log_pos  <-- BIN_LOG_HEADER_SIZE;
>> +      rli->group_relay_log_name <-- rli->relay_log.get_log_fname();
>> +      rli->event_relay_log_name <-- rli->relay_log.get_log_fname();
>> +  
>> +   If there is an error, it returns (1), otherwise returns (0).
>>   */
>>  static int init_recovery(Master_info* mi)
>>  {
>> @@ -4316,6 +4324,8 @@ void rotate_relay_log(Master_info* mi)
>>    DBUG_ENTER("rotate_relay_log");
>>    Relay_log_info* rli= &mi->rli;
>>  
>> +  DBUG_EXECUTE_IF("crash_before_rotate_relaylog", exit(1););
>> +
>>    /* We don't lock rli->run_lock. This would lead to deadlocks. */
>>    pthread_mutex_lock(&mi->run_lock);
>>  
>>
>> === modified file 'sql/sql_binlog.cc'
>> --- a/sql/sql_binlog.cc	2009-01-26 16:03:39 +0000
>> +++ b/sql/sql_binlog.cc	2009-02-12 09:13:57 +0000
>> @@ -58,7 +58,7 @@ void mysql_client_binlog_statement(THD* 
>>    my_bool have_fd_event= TRUE;
>>    if (!thd->rli_fake)
>>    {
>> -    thd->rli_fake= new Relay_log_info;
>> +    thd->rli_fake= new Relay_log_info(false);
>>     
>
> We use 'FALSE' instead of 'false'.
>   
fixed.
>   
>>  #ifdef HAVE_purify
>>      thd->rli_fake->is_fake= TRUE;
>>  #endif
>>
>>
>>     
>
>   
> ------------------------------------------------------------------------
>
>
>   
Thank you.

Thread
bzr commit into mysql-6.0-rpl branch (alfranio.correia:2811) Bug#40337Alfranio Correia12 Feb
  • Re: bzr commit into mysql-6.0-rpl branch (alfranio.correia:2811)Bug#40337Mats Kindahl16 Feb
    • Re: bzr commit into mysql-6.0-rpl branch (alfranio.correia:2811)Bug#40337Alfranio Correia17 Feb