Thanks for this fix! Great to fix several races in one patch. I have
nothing to add, the patch is good to push.
/Sven
Luis Soares wrote:
> #At file:///home/lsoares/Workspace/bzr/work/bugfixing/12416611/mysql-trunk/ based on
> revid:alexander.nozdrin@stripped
>
> 3364 Luis Soares 2011-05-13
> BUG#12416611: VALGRIND ERRORS IN SHOW SLAVE STATUS, RPL_CROSS_VERSION
>
> There are two problems found: (i) potential race between user
> thread and SQL thread; (ii) Create_file_log_event and
> Append_block_log_event's do_apply function set thd->proc_info to
> local memory which is freed when the function returns.
>
> In problem #1, there could be a concurrent access to
> mi->rli->info_thd while it was not yet set, so better to play
> safe and synchronize on the run_lock.
>
> In problem #2, thd->proc_info remains pointing to bogus memory
> after applying Append_block and Create_file events. If (un)lucky
> enough the user might get unintended output for
> Slave_SQL_Running_State field while showing slave status.
>
> To fix problem #1, we deploy a synchronization on
> mi->rli->run_lock. To fix problem #2, we change the status
> message showing up in SHOW SLAVE STATUS. It does not show the
> filename anymore in the status message, instead it shows a static
> sentence (thanks Sven for the suggestion):
>
> - Append_block events
> "Making temporary file (append) before replaying LOAD DATA
> INFILE."
>
> - Create_file events:
> "Making temporary file (create) before replaying LOAD DATA
> INFILE."
>
> Additionally, we are also fixing the remaining parts in the
> replication code that are using thd_proc_info. They now use
> THD_STAGE_INFO instead.
>
> modified:
> sql/log_event.cc
> sql/mysqld.cc
> sql/mysqld.h
> sql/rpl_rli.cc
> sql/rpl_rli.h
> sql/rpl_slave.cc
> === modified file 'sql/log_event.cc'
> --- a/sql/log_event.cc 2011-03-25 12:15:56 +0000
> +++ b/sql/log_event.cc 2011-05-13 13:59:55 +0000
> @@ -6753,16 +6753,15 @@ void Create_file_log_event::pack_info(Pr
> #if defined(HAVE_REPLICATION) && !defined(MYSQL_CLIENT)
> int Create_file_log_event::do_apply_event(Relay_log_info const *rli)
> {
> - char proc_info[17+FN_REFLEN+10], *fname_buf;
> + char fname_buf[FN_REFLEN+10];
> char *ext;
> int fd = -1;
> IO_CACHE file;
> int error = 1;
>
> + THD_STAGE_INFO(thd, stage_making_temp_file_create_before_load_data);
> bzero((char*)&file, sizeof(file));
> - fname_buf= strmov(proc_info, "Making temp file ");
> ext= slave_load_file_stem(fname_buf, file_id, server_id, ".info");
> - thd_proc_info(thd, proc_info);
> /* old copy may exist already */
> mysql_file_delete(key_file_log_event_info, fname_buf, MYF(0));
> if ((fd= mysql_file_create(key_file_log_event_info,
> @@ -6932,14 +6931,13 @@ int Append_block_log_event::get_create_o
>
> int Append_block_log_event::do_apply_event(Relay_log_info const *rli)
> {
> - char proc_info[17+FN_REFLEN+10], *fname= proc_info+17;
> + char fname[FN_REFLEN+10];
> int fd;
> int error = 1;
> DBUG_ENTER("Append_block_log_event::do_apply_event");
>
> - fname= strmov(proc_info, "Making temp file ");
> + THD_STAGE_INFO(thd, stage_making_temp_file_append_before_load_data);
> slave_load_file_stem(fname, file_id, server_id, ".data");
> - thd_proc_info(thd, proc_info);
> if (get_create_or_append())
> {
> /*
>
> === modified file 'sql/mysqld.cc'
> --- a/sql/mysqld.cc 2011-04-28 16:50:10 +0000
> +++ b/sql/mysqld.cc 2011-05-13 13:59:55 +0000
> @@ -8363,7 +8363,7 @@ PSI_stage_info stage_end= { 0, "end", 0}
> PSI_stage_info stage_executing= { 0, "executing", 0};
> PSI_stage_info stage_execution_of_init_command= { 0, "Execution of init_command",
> 0};
> PSI_stage_info stage_finished_reading_one_binlog_switching_to_next_binlog= { 0,
> "Finished reading one binlog; switching to next binlog", 0};
> -PSI_stage_info stage_flushing_relay_log_and_master_info_files= { 0, "Flushing relay
> log and master info files.", 0};
> +PSI_stage_info stage_flushing_relay_log_and_master_info_repository= { 0, "Flushing
> relay log and master info repository.", 0};
> PSI_stage_info stage_flushing_relay_log_info_file= { 0, "Flushing relay-log info
> file.", 0};
> PSI_stage_info stage_freeing_items= { 0, "freeing items", 0};
> PSI_stage_info stage_fulltext_initialization= { 0, "FULLTEXT initialization", 0};
> @@ -8375,6 +8375,8 @@ PSI_stage_info stage_invalidating_query_
> PSI_stage_info stage_invalidating_query_cache_entries_table_list= { 0, "invalidating
> query cache entries (table list)", 0};
> PSI_stage_info stage_killing_slave= { 0, "Killing slave", 0};
> PSI_stage_info stage_logging_slow_query= { 0, "logging slow query", 0};
> +PSI_stage_info stage_making_temp_file_append_before_load_data= { 0, "Making
> temporary file (append) before replaying LOAD DATA INFILE.", 0};
> +PSI_stage_info stage_making_temp_file_create_before_load_data= { 0, "Making
> temporary file (create) before replaying LOAD DATA INFILE.", 0};
> PSI_stage_info stage_manage_keys= { 0, "manage keys", 0};
> PSI_stage_info stage_opening_tables= { 0, "Opening tables", 0};
> PSI_stage_info stage_optimizing= { 0, "optimizing", 0};
> @@ -8399,6 +8401,7 @@ PSI_stage_info stage_sorting_for_group=
> PSI_stage_info stage_sorting_for_order= { 0, "Sorting for order", 0};
> PSI_stage_info stage_sorting_result= { 0, "Sorting result", 0};
> PSI_stage_info stage_statistics= { 0, "statistics", 0};
> +PSI_stage_info stage_sql_thd_waiting_until_delay= { 0, "Waiting until MASTER_DELAY
> seconds after master executed event", 0 };
> PSI_stage_info stage_storing_result_in_query_cache= { 0, "storing result in query
> cache", 0};
> PSI_stage_info stage_storing_row_into_queue= { 0, "storing row into queue", 0};
> PSI_stage_info stage_system_lock= { 0, "System lock", 0};
> @@ -8452,7 +8455,7 @@ PSI_stage_info *all_server_stages[]=
> & stage_executing,
> & stage_execution_of_init_command,
> & stage_finished_reading_one_binlog_switching_to_next_binlog,
> - & stage_flushing_relay_log_and_master_info_files,
> + & stage_flushing_relay_log_and_master_info_repository,
> & stage_flushing_relay_log_info_file,
> & stage_freeing_items,
> & stage_fulltext_initialization,
> @@ -8464,6 +8467,8 @@ PSI_stage_info *all_server_stages[]=
> & stage_invalidating_query_cache_entries_table_list,
> & stage_killing_slave,
> & stage_logging_slow_query,
> + & stage_making_temp_file_append_before_load_data,
> + & stage_making_temp_file_create_before_load_data,
> & stage_manage_keys,
> & stage_opening_tables,
> & stage_optimizing,
> @@ -8487,6 +8492,7 @@ PSI_stage_info *all_server_stages[]=
> & stage_sorting_for_group,
> & stage_sorting_for_order,
> & stage_sorting_result,
> + & stage_sql_thd_waiting_until_delay,
> & stage_statistics,
> & stage_storing_result_in_query_cache,
> & stage_storing_row_into_queue,
>
> === modified file 'sql/mysqld.h'
> --- a/sql/mysqld.h 2011-04-28 16:50:10 +0000
> +++ b/sql/mysqld.h 2011-05-13 13:59:55 +0000
> @@ -334,7 +334,7 @@ extern PSI_stage_info stage_end;
> extern PSI_stage_info stage_executing;
> extern PSI_stage_info stage_execution_of_init_command;
> extern PSI_stage_info stage_finished_reading_one_binlog_switching_to_next_binlog;
> -extern PSI_stage_info stage_flushing_relay_log_and_master_info_files;
> +extern PSI_stage_info stage_flushing_relay_log_and_master_info_repository;
> extern PSI_stage_info stage_flushing_relay_log_info_file;
> extern PSI_stage_info stage_freeing_items;
> extern PSI_stage_info stage_fulltext_initialization;
> @@ -346,6 +346,8 @@ extern PSI_stage_info stage_invalidating
> extern PSI_stage_info stage_invalidating_query_cache_entries_table_list;
> extern PSI_stage_info stage_killing_slave;
> extern PSI_stage_info stage_logging_slow_query;
> +extern PSI_stage_info stage_making_temp_file_append_before_load_data;
> +extern PSI_stage_info stage_making_temp_file_create_before_load_data;
> extern PSI_stage_info stage_manage_keys;
> extern PSI_stage_info stage_opening_tables;
> extern PSI_stage_info stage_optimizing;
> @@ -369,6 +371,7 @@ extern PSI_stage_info stage_setup;
> extern PSI_stage_info stage_sorting_for_group;
> extern PSI_stage_info stage_sorting_for_order;
> extern PSI_stage_info stage_sorting_result;
> +extern PSI_stage_info stage_sql_thd_waiting_until_delay;
> extern PSI_stage_info stage_statistics;
> extern PSI_stage_info stage_storing_result_in_query_cache;
> extern PSI_stage_info stage_storing_row_into_queue;
>
> === modified file 'sql/rpl_rli.cc'
> --- a/sql/rpl_rli.cc 2011-04-28 16:50:10 +0000
> +++ b/sql/rpl_rli.cc 2011-05-13 13:59:55 +0000
> @@ -43,8 +43,6 @@ const char* info_rli_fields[]=
> "sql_delay"
> };
>
> -const char *const Relay_log_info::state_delaying_string = "Waiting until
> MASTER_DELAY seconds after master executed event";
> -
> Relay_log_info::Relay_log_info(bool is_slave_recovery
> #ifdef HAVE_PSI_INTERFACE
> ,PSI_mutex_key *param_key_info_run_lock,
>
> === modified file 'sql/rpl_rli.h'
> --- a/sql/rpl_rli.h 2011-04-28 16:50:10 +0000
> +++ b/sql/rpl_rli.h 2011-05-13 13:59:55 +0000
> @@ -551,11 +551,6 @@ public:
> size_t get_number_info_rli_fields();
>
> /**
> - Text used in THD::proc_info when the slave SQL thread is delaying.
> - */
> - static const char *const state_delaying_string;
> -
> - /**
> Indicate that a delay starts.
>
> This does not actually sleep; it only sets the state of this
> @@ -570,7 +565,7 @@ public:
> {
> mysql_mutex_assert_owner(&data_lock);
> sql_delay_end= delay_end;
> - thd_proc_info(info_thd, state_delaying_string);
> + THD_STAGE_INFO(info_thd, stage_sql_thd_waiting_until_delay);
> }
>
> int32 get_sql_delay() { return sql_delay; }
>
> === modified file 'sql/rpl_slave.cc'
> --- a/sql/rpl_slave.cc 2011-04-28 16:50:10 +0000
> +++ b/sql/rpl_slave.cc 2011-05-13 13:59:55 +0000
> @@ -634,7 +634,7 @@ int terminate_slave_threads(Master_info*
>
> DBUG_PRINT("info",("Flushing relay-log info file."));
> if (current_thd)
> - thd_proc_info(current_thd, "Flushing relay-log info file.");
> + THD_STAGE_INFO(current_thd, stage_flushing_relay_log_info_file);
>
> /*
> Flushes the relay log info regardles of the sync_relay_log_info option.
> @@ -659,7 +659,7 @@ int terminate_slave_threads(Master_info*
>
> DBUG_PRINT("info",("Flushing relay log and master info repository."));
> if (current_thd)
> - thd_proc_info(current_thd, "Flushing relay log and master info repository.");
> + THD_STAGE_INFO(current_thd,
> stage_flushing_relay_log_and_master_info_repository);
>
> /*
> Flushes the master info regardles of the sync_master_info option.
> @@ -2080,6 +2080,10 @@ bool show_master_info(THD* thd, Master_i
> protocol->store(mi->info_thd ? mi->info_thd->proc_info : "",
> &my_charset_bin);
> mysql_mutex_unlock(&mi->run_lock);
>
> + mysql_mutex_lock(&mi->rli->run_lock);
> + const char *slave_sql_running_state= mi->rli->info_thd ?
> mi->rli->info_thd->proc_info : "";
> + mysql_mutex_unlock(&mi->rli->run_lock);
> +
> mysql_mutex_lock(&mi->data_lock);
> mysql_mutex_lock(&mi->rli->data_lock);
> mysql_mutex_lock(&mi->err_lock);
> @@ -2216,10 +2220,7 @@ bool show_master_info(THD* thd, Master_i
> // SQL_Delay
> protocol->store((uint32) mi->rli->get_sql_delay());
> // SQL_Remaining_Delay
> - // THD::proc_info is not protected by any lock, so we read it once
> - // to ensure that we use the same value throughout this function.
> - const char *slave_sql_running_state= mi->rli->info_thd ?
> mi->rli->info_thd->proc_info : "";
> - if (slave_sql_running_state == Relay_log_info::state_delaying_string)
> + if (slave_sql_running_state == stage_sql_thd_waiting_until_delay.m_name)
> {
> time_t t= my_time(0), sql_delay_end= mi->rli->get_sql_delay_end();
> protocol->store((uint32)(t < sql_delay_end ? sql_delay_end - t : 0));
> @@ -3693,7 +3694,7 @@ log '%s' at position %s, relay log '%s'
>
> while (!sql_slave_killed(thd,rli))
> {
> - thd_proc_info(thd, "Reading event from the relay log");
> + THD_STAGE_INFO(thd, stage_reading_event_from_the_relay_log);
> DBUG_ASSERT(rli->info_thd == thd);
> THD_CHECK_SENTRY(thd);
>
> @@ -5975,7 +5976,7 @@ bool change_master(THD* thd, Master_info
> if (need_relay_log_purge)
> {
> relay_log_purge= 1;
> - thd_proc_info(thd, "Purging old relay logs");
> + THD_STAGE_INFO(thd, stage_purging_old_relay_logs);
> if (mi->rli->purge_relay_logs(thd,
> 0 /* not only reset, but also reinit */,
> &errmsg))
>
>
>
> ------------------------------------------------------------------------
>
>