Hej Andrei,
The fix is good. I only have some minor comments:
- The result file is missing.
- See comments below.
/Sven
Andrei Elkin wrote:
> Below is the list of changes that have just been committed into a local
> 5.0 repository of aelkin. When aelkin does a push these changes
> will be propagated to the main repository and, within 24 hours after the
> push, to the public repository.
> For information on how to access the public repository
> see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html
>
> ChangeSet@stripped, 2008-02-04 23:57:56+02:00, aelkin@stripped +3 -0
> Bug #33931 assertion at write_ignored_events_info_to_relay_log if
> init_slave_thread() fails
> and
> bug#33932 assertion at handle_slave_sql if init_slave_thread() fails
>
> the asserts were caused by
> bug33931: having thd deleted at time of executing err: code plus
> a missed initialization;
> bug33932: initialization of slave_is_running member was missed;
>
> fixed with relocating mi members initialization and removing delete thd
> It is safe to do as deletion happens later.
>
> Todo: at merging the test is better to be moved into suite/bugs for 5.x (when
> x>0).
>
> mysql-test/t/rpl_bug33931-slave.opt@stripped, 2008-02-04 23:57:50+02:00,
> aelkin@stripped +1 -0
> option to spark the simulation code
>
> mysql-test/t/rpl_bug33931-slave.opt@stripped, 2008-02-04 23:57:50+02:00,
> aelkin@stripped +0 -0
>
> mysql-test/t/rpl_bug33931.test@stripped, 2008-02-04 23:57:54+02:00,
> aelkin@stripped +33 -0
> tests check that slave does not crash as before.
> Slave threads must be in NO running state in the end.
>
>
> mysql-test/t/rpl_bug33931.test@stripped, 2008-02-04 23:57:54+02:00,
> aelkin@stripped +0 -0
>
> sql/slave.cc@stripped, 2008-02-04 23:57:49+02:00, aelkin@stripped +12 -4
> adding the bugs simulating code;
> relocating some assignments to satisfy the asserts;
>
> diff -Nrup a/mysql-test/t/rpl_bug33931-slave.opt
> b/mysql-test/t/rpl_bug33931-slave.opt
> --- /dev/null Wed Dec 31 16:00:00 196900
> +++ b/mysql-test/t/rpl_bug33931-slave.opt 2008-02-04 23:57:50 +02:00
> @@ -0,0 +1 @@
> +--loose-debug=d,simulate_io_slave_error,simulate_sql_slave_error
I think "simulate_slave_io_error" is a too general name. Something like
"simulate_slave_io_error_during_startup" would be more clear. Similar
for "simulate_slave_sql_error".
> diff -Nrup a/mysql-test/t/rpl_bug33931.test b/mysql-test/t/rpl_bug33931.test
> --- /dev/null Wed Dec 31 16:00:00 196900
> +++ b/mysql-test/t/rpl_bug33931.test 2008-02-04 23:57:54 +02:00
> @@ -0,0 +1,33 @@
> +# Test for
> +# Bug #33931 assertion at write_ignored_events_info_to_relay_log if
> init_slave_thread() fails
> +# Bug #33932 assertion at handle_slave_sql if init_slave_thread() fails
Could you please comment the test more elaborately? I think the
following explanations are needed:
- What is expected from the test (slave acts correctly despite the
simulated error on startup, and slave status is non-running afterwards).
- What the test does (starts and stops a slave).
- What the "simulate_slave_io_error" flag is for.
> +
> +source include/have_log_bin.inc;
> +
> +connect (master,127.0.0.1,root,,test,$MASTER_MYPORT,);
> +connect (slave,127.0.0.1,root,,test,$SLAVE_MYPORT,);
> +
> +
> +connection master;
> +reset master;
> +
> +connection slave;
> +--disable_warnings
> +stop slave;
> +--enable_warnings
> +reset slave;
> +start slave;
> +
> +connection master;
> +save_master_pos;
> +connection slave;
> +
> +source include/wait_for_slave_to_stop.inc;
> +
> +--replace_result $MASTER_MYPORT MASTER_PORT
> +--replace_column 1 # 8 # 9 # 23 # 33 #
> +--vertical_results
> +show slave status;
> +
> +# no clean-up is needed
> +
> diff -Nrup a/sql/slave.cc b/sql/slave.cc
> --- a/sql/slave.cc 2007-12-20 17:07:52 +02:00
> +++ b/sql/slave.cc 2008-02-04 23:57:49 +02:00
> @@ -2895,6 +2895,9 @@ void set_slave_thread_default_charset(TH
> static int init_slave_thread(THD* thd, SLAVE_THD_TYPE thd_type)
> {
> DBUG_ENTER("init_slave_thread");
> +#if !defined(DBUG_OFF)
> + int simulate_error= 0;
> +#endif
> thd->system_thread = (thd_type == SLAVE_THD_SQL) ?
> SYSTEM_THREAD_SLAVE_SQL : SYSTEM_THREAD_SLAVE_IO;
> thd->security_ctx->skip_grants();
> @@ -2914,10 +2917,15 @@ static int init_slave_thread(THD* thd, S
> thd->thread_id = thread_id++;
> pthread_mutex_unlock(&LOCK_thread_count);
>
> + DBUG_EXECUTE_IF("simulate_io_slave_error", simulate_error|= (1 <<
> SLAVE_THD_IO););
> + DBUG_EXECUTE_IF("simulate_sql_slave_error", simulate_error|= (1 <<
> SLAVE_THD_SQL););
Why use these bit tricks, and not just:
DBUG_EXECUTE_IF("simulate_io_slave_error_during_startup",
{
if (thd_type == SLAVE_THD_IO)
simulte_error= 1;
});
DBUG_EXECUTE_IF("simulate_sql_slave_error_during_startup",
{
if (thd_type == SLAVE_THD_SQL)
simulte_error= 1;
});
#if !defined(DBUG_OFF)
if (init_thr_lock() || thd->store_globals() || simulate_error)
? It's a minor simplification, but I think complex constructions like
bit fields should be avoided when possible.
> +#if !defined(DBUG_OFF)
> + if (init_thr_lock() || thd->store_globals() || simulate_error & (1<<
> thd_type))
> +#else
> if (init_thr_lock() || thd->store_globals())
> +#endif
> {
> thd->cleanup();
> - delete thd;
> DBUG_RETURN(-1);
> }
>
> @@ -3515,6 +3523,7 @@ slave_begin:
>
> thd= new THD; // note that contructor of THD uses DBUG_ !
> THD_CHECK_SENTRY(thd);
> + mi->io_thd = thd;
>
> pthread_detach_this_thread();
> thd->thread_stack= (char*) &thd; // remember where our stack is
> @@ -3525,7 +3534,6 @@ slave_begin:
> sql_print_error("Failed during slave I/O thread initialization");
> goto err;
> }
> - mi->io_thd = thd;
> pthread_mutex_lock(&LOCK_thread_count);
> threads.append(thd);
> pthread_mutex_unlock(&LOCK_thread_count);
> @@ -3865,9 +3873,11 @@ slave_begin:
>
> thd = new THD; // note that contructor of THD uses DBUG_ !
> thd->thread_stack = (char*)&thd; // remember where our stack is
> + rli->sql_thd= thd;
>
> /* Inform waiting threads that slave has started */
> rli->slave_run_id++;
> + rli->slave_running = 1;
>
> pthread_detach_this_thread();
> if (init_slave_thread(thd, SLAVE_THD_SQL))
> @@ -3882,7 +3892,6 @@ slave_begin:
> goto err;
> }
> thd->init_for_queries();
> - rli->sql_thd= thd;
> thd->temporary_tables = rli->save_temporary_tables; // restore temp tables
> pthread_mutex_lock(&LOCK_thread_count);
> threads.append(thd);
> @@ -3895,7 +3904,6 @@ slave_begin:
> start receiving data so we realize we are not caught up and
> Seconds_Behind_Master grows. No big deal.
> */
> - rli->slave_running = 1;
> rli->abort_slave = 0;
> pthread_mutex_unlock(&rli->run_lock);
> pthread_cond_broadcast(&rli->start_cond);
>
--
Sven Sandberg, Software Engineer
MySQL AB, www.mysql.com