Hi Alfranio
Patch looks OK, have some minor comments, OK to push after fix them!
Alfranio Correia wrote:
> #At file:///home/acorreia/workspace.sun/playground.mysql/mysql-5.0.68-custom_build/
>
> 2704 Alfranio Correia 2008-11-13
> BUG#40337 Fsyncing master and relay log to disk after every
> event is too slow.
>
> Introduced a recovery mechanism that throws away relay-log.bin* retrieved
> from a master in case of a crash and updates the master.info based on
> the relay.info. This is done because such files may be corrupted after
> a crash.
>
> It is advisalbe to use this feature, when the parameter --syn-relay-log-info =
> 1.
typo: advisable
How about revise this to:
Note: If relay-log.info is corrupted or incorrect, this recovery
mechanism could fail. So it is advised to use this feature with
parameter --sync-relay-log-info=1 to minimize the chance of
relay-log.info corruption.
You can decide on the wording, English is not my mother tongue :-)
> modified:
> mysql-test/r/rpl_flush_log_loop.result
> sql/mysql_priv.h
> sql/mysqld.cc
> sql/set_var.cc
> sql/slave.cc
>
> === modified file 'mysql-test/r/rpl_flush_log_loop.result'
> --- a/mysql-test/r/rpl_flush_log_loop.result 2007-10-10 07:21:11 +0000
> +++ b/mysql-test/r/rpl_flush_log_loop.result 2008-11-13 20:19:09 +0000
> @@ -10,6 +10,7 @@ relay_log MYSQLTEST_VARDIR/master-data/r
> relay_log_index
> relay_log_info_file relay-log.info
> relay_log_purge ON
> +relay_log_recovery OFF
> relay_log_space_limit 0
> stop slave;
> change master to master_host='127.0.0.1',master_user='root',
>
> === modified file 'sql/mysql_priv.h'
> --- a/sql/mysql_priv.h 2008-10-02 11:57:52 +0000
> +++ b/sql/mysql_priv.h 2008-11-13 20:19:09 +0000
> @@ -1314,6 +1314,7 @@ extern ulong expire_logs_days, sync_binl
> extern ulong opt_tc_log_size, tc_log_max_pages_used, tc_log_page_size;
> extern ulong tc_log_page_waits;
> extern my_bool relay_log_purge, opt_innodb_safe_binlog, opt_innodb;
> +extern my_bool relay_log_recovery;
> extern uint test_flags,select_errors,ha_open_options;
> extern uint protocol_version, mysqld_port, dropping_tables;
> extern uint delay_key_write_options, lower_case_table_names;
>
> === modified file 'sql/mysqld.cc'
> --- a/sql/mysqld.cc 2008-08-26 08:32:43 +0000
> +++ b/sql/mysqld.cc 2008-11-13 20:19:09 +0000
> @@ -404,7 +404,8 @@ ulong opt_ndb_cache_check_time;
> const char *opt_ndb_mgmd;
> ulong opt_ndb_nodeid;
> #endif
> -my_bool opt_readonly, use_temp_pool, relay_log_purge;
> +my_bool opt_readonly, use_temp_pool, relay_log_purge;
> +my_bool relay_log_recovery;
> my_bool opt_sync_frm, opt_allow_suspicious_udfs;
> my_bool opt_secure_auth= 0;
> char* opt_secure_file_priv= 0;
> @@ -4918,6 +4919,7 @@ enum options_mysqld
> OPT_QUERY_CACHE_TYPE, OPT_QUERY_CACHE_WLOCK_INVALIDATE, OPT_RECORD_BUFFER,
> OPT_RECORD_RND_BUFFER, OPT_DIV_PRECINCREMENT, OPT_RELAY_LOG_SPACE_LIMIT,
> OPT_RELAY_LOG_PURGE,
> + OPT_RELAY_LOG_RECOVERY,
> OPT_SLAVE_NET_TIMEOUT, OPT_SLAVE_COMPRESSED_PROTOCOL, OPT_SLOW_LAUNCH_TIME,
> OPT_SLAVE_TRANS_RETRIES, OPT_READONLY, OPT_DEBUGGING,
> OPT_SORT_BUFFER, OPT_TABLE_CACHE,
> @@ -6302,6 +6304,11 @@ The minimum value for this variable is 4
> (gptr*) &relay_log_purge,
> (gptr*) &relay_log_purge, 0, GET_BOOL, NO_ARG,
> 1, 0, 1, 0, 1, 0},
> + {"relay_log_recovery", OPT_RELAY_LOG_RECOVERY,
> + "Enables automatic relay log recovery right after the database startup, which
> means that the IO Thread starts re-fetching from the master right after the last
> transaction processed.",
> + (gptr*) &relay_log_recovery,
> + (gptr*) &relay_log_recovery, 0, GET_BOOL, NO_ARG,
> + 0, 0, 1, 0, 1, 0},
> {"relay_log_space_limit", OPT_RELAY_LOG_SPACE_LIMIT,
> "Maximum space to use for all relay logs.",
> (gptr*) &relay_log_space_limit,
>
> === modified file 'sql/set_var.cc'
> --- a/sql/set_var.cc 2008-08-25 12:11:59 +0000
> +++ b/sql/set_var.cc 2008-11-13 20:19:09 +0000
> @@ -323,6 +323,8 @@ sys_var_thd_ulong sys_div_precincrement(
> #ifdef HAVE_REPLICATION
> sys_var_bool_ptr sys_relay_log_purge("relay_log_purge",
> &relay_log_purge);
> +sys_var_bool_ptr sys_relay_log_recovery("relay_log_recovery",
> + &relay_log_recovery);
> #endif
> sys_var_long_ptr sys_rpl_recovery_rank("rpl_recovery_rank",
> &rpl_recovery_rank);
> @@ -733,6 +735,7 @@ sys_var *sys_variables[]=
> &sys_read_rnd_buff_size,
> #ifdef HAVE_REPLICATION
> &sys_relay_log_purge,
> + &sys_relay_log_recovery,
> #endif
> &sys_rpl_recovery_rank,
> &sys_safe_updates,
> @@ -1052,6 +1055,7 @@ struct show_var_st init_vars[]= {
> {"relay_log_index", (char*) &opt_relaylog_index_name, SHOW_CHAR_PTR},
> {"relay_log_info_file", (char*) &relay_log_info_file, SHOW_CHAR_PTR},
> {sys_relay_log_purge.name, (char*) &sys_relay_log_purge, SHOW_SYS},
> + {sys_relay_log_recovery.name, (char*) &sys_relay_log_recovery, SHOW_SYS},
> {"relay_log_space_limit", (char*) &relay_log_space_limit,
> SHOW_LONGLONG},
> #endif
> {sys_rpl_recovery_rank.name,(char*) &sys_rpl_recovery_rank, SHOW_SYS},
>
> === modified file 'sql/slave.cc'
> --- a/sql/slave.cc 2008-03-28 20:01:05 +0000
> +++ b/sql/slave.cc 2008-11-13 20:19:09 +0000
> @@ -67,6 +67,7 @@ static inline bool io_slave_killed(THD*
> static inline bool sql_slave_killed(THD* thd,RELAY_LOG_INFO* rli);
> static int count_relay_log_space(RELAY_LOG_INFO* rli);
> static int init_slave_thread(THD* thd, SLAVE_THD_TYPE thd_type);
> +static int init_recovery(MASTER_INFO* mi);
> static int safe_connect(THD* thd, MYSQL* mysql, MASTER_INFO* mi);
> static int safe_reconnect(THD* thd, MYSQL* mysql, MASTER_INFO* mi,
> bool suppress_warnings);
> @@ -174,6 +175,9 @@ int init_slave()
> if (server_id && !master_host && active_mi->host[0])
> master_host= active_mi->host;
>
> + if (init_recovery(active_mi))
> + goto err;
> +
> /* If server id is not set, start_slave_thread() will say it */
>
> if (master_host && !opt_skip_slave_start)
> @@ -197,6 +201,73 @@ err:
> DBUG_RETURN(1);
> }
>
> +/*
> +* Updates the master info based on the information stored in the relay info and
> +* ignores relay logs previously retrieved by the IO thread starting fetching again
> +* based on to the group_master_log_pos and group_master_log_name.
> +*
> +* There is no need for a mutex
> +* as the caller (i.e. init_slave) already has one acquired.
> +*
> +* The recovery is done just after the database startup which means that
> +* start slave and stop slave commands do not throw away any relay log
> +* retrieved in the meantime.
> +*
> +* In the feature, we can improve this routine in order to avoid throwing
> +* away logs that are safely stored in the disk. Eventually, the old
> +* relay logs will be purged by the normal purge mechanism.
> +*
> +* Specifically,
> +*
> +* 1 - mi->master_log_pos <-- rli->group_master_log_pos
> +* 2 - mi->master_log_name <-- rli->group_master_log_name
> +* 3 - It moves the relay info to the new relay log file, by
> +* rli->group_relay_log_pos <-- rli->group_relay_log_pos <--
> BIN_LOG_HEADER_SIZE;
> +* rli->group_relay_log_name, rli->group_relay_log_pos
> +*
> +* If there is an error, it returns (1), otherwise returns (0).
> +*/
> +static int init_recovery(MASTER_INFO* mi)
> +{
> + const char* errmsg=0;
> + DBUG_ENTER("init_recovery");
> +
> + RELAY_LOG_INFO* rli = &mi->rli;
Please following the coding style,
use rli= &mi->rli;
Also please fix the coding style violations below.
> + if (relay_log_recovery && rli->group_master_log_name[0])
> + {
> + mi->master_log_pos = max(BIN_LOG_HEADER_SIZE,
> + rli->group_master_log_pos);
Here too.
> + strmake(mi->master_log_name, rli->group_master_log_name,
> + sizeof(mi->master_log_name)-1);
Fix indent
> +
> + strmake(rli->group_relay_log_name, rli->relay_log.get_log_fname(),
> + sizeof(rli->group_relay_log_name)-1);
> + strmake(rli->event_relay_log_name, rli->relay_log.get_log_fname(),
> + sizeof(mi->rli.event_relay_log_name)-1);
> +
> + rli->group_relay_log_pos = rli->group_relay_log_pos =
> BIN_LOG_HEADER_SIZE;
> +
Here
> + if (init_relay_log_pos(rli,
> + rli->group_relay_log_name,
> + rli->group_relay_log_pos,
> + 0 /*no data lock*/,
> + &errmsg, 0))
> + DBUG_RETURN(1);
> +
> + if (flush_master_info(mi, 0))
> + {
> + sql_print_error("Failed to flush master info file");
> + DBUG_RETURN(1);
> + }
> + if (flush_relay_log_info(rli))
> + {
> + sql_print_error("Failed to flush master info file");
> + DBUG_RETURN(1);
> + }
> + }
> +
> + DBUG_RETURN(0);
> +}
>
> static void free_table_ent(TABLE_RULE_ENT* e)
> {
>