#At file:///home/daogangqu/mysql/bzrwork/wl344/mysql-5.1-bugteam/ based on revid:luis.soares@stripped
3379 Dao-Gang.Qu@stripped 2010-03-15
WL#344: Time-delayed replication
Make replication slave to lag a specified amount of time behind the master.
This feature can be used for several purposes:
(P1) Protect against user mistakes on master. A DBA that makes a disaster on a
master that has a delayed slave can roll back to the time just before the
disaster.
Example: assume there is one master and one slave, the slave is running
with a delay, and the DBA accidentally drops all tables on the master. The
mistake can be undone with minimal downtime, as follows:
slave> STOP SLAVE
Replace the master by the slave and shut down the old master.
slave> CHANGE MASTER TO MASTER_DELAY = 0
slave> START SLAVE SQL_THREAD UNTIL <binlog position before the disaster>
Variations of the above scheme can be used depending on the precise setup
and the application's requirements.
Cf. BUG#21639, BUG#22072
(P2) Test how the system behaves when there is a big load that causes the slave
to lag, without having to generate the load.
(P3) Debug conditions related to a lagging slave
Cf. BUG#28760.
(P4) Inspect what the database looked like long ago, without having to reload a
backup. For instance, if the delay is one week, and the DBA did development
for a couple of days, and the DBA needs to go back and see what the
database used to look like before the development, then the delayed slave
can be inspected.
Cf. BUG#22072 ("how to repeat")
@ sql/lex.h
Added MASTER_DELAY parser symbol.
@ sql/log.cc
Clarified comments and added assertion.
@ sql/log_event.cc
Clarified comments.
@ sql/rpl_rli.cc
- Added delay parameter to relay_log_info file.
- Made is_fake available for all builds. We now need is_fake in
apply_event_and_update_pos() [slave.cc]
@ sql/rpl_rli.h
Made is_fake available for all builds, and documented it.
@ sql/share/errmsg.txt
Added new error codes.
@ sql/slave.cc
- Added SQL_Delay and SQL_Remaining_Delay to SHOW SLAVE STATUS.
- Updated apply_event_and_update_pos so that it takes sleeping into
account. The signature of apply_event_and_update_pos has been changed: the
"skip" parameter has been removed. Instead, the rli->is_fake flag is used
to determine if skipping and/or sleeping should be done.
- Cleaned up some debug printout code in apply_event_and_update_pos
(since it is related to having rli->is_fake under all builds)
- Improved documentation.
@ sql/slave.h
Added MASTER_DELAY_MAX.
Clarified comments.
@ sql/sql_binlog.cc
- Removed preprocessor conditional guarding initialization of
rli->is_fake, so is_fake is now unconditionally a part of the rli
structure.
- Removed initialization of ev->thd from mysql_client_binlog_statement(),
since ev->thd is initialized by apply_event_and_update_pos().
- Added code to check that BINLOG statements only contain row events and FD
events, to avoid deadlock.
@ sql/sql_lex.cc
Added implementation of new function
st_lex_master_info::set_unspecified()
@ sql/sql_lex.h
Added delay to st_lex_master_info.
Added set_unspecified()
@ sql/sql_repl.cc
Initialize Master_info.delay from Lex_master_info.delay.
@ sql/sql_yacc.yy
- Added code to parse "CHANGE MASTER TO MASTER_DELAY = X"
- Use new function st_lex_master_info::set_unspecified
added:
mysql-test/suite/rpl/t/rpl_delayed_slave.test
modified:
sql/lex.h
sql/log.cc
sql/log_event.cc
sql/rpl_rli.cc
sql/rpl_rli.h
sql/share/errmsg.txt
sql/slave.cc
sql/slave.h
sql/sql_binlog.cc
sql/sql_lex.cc
sql/sql_lex.h
sql/sql_repl.cc
sql/sql_yacc.yy
=== added file 'mysql-test/suite/rpl/t/rpl_delayed_slave.test'
--- a/mysql-test/suite/rpl/t/rpl_delayed_slave.test 1970-01-01 00:00:00 +0000
+++ b/mysql-test/suite/rpl/t/rpl_delayed_slave.test 2010-03-15 07:02:17 +0000
@@ -0,0 +1,257 @@
+# ==== Purpose ====
+#
+# Test the time-delayed replication feature, i.e.,
+# CHANGE MASTER TO MASTER_DELAY=X:
+#
+# - Verify that slave has executed the events after but not before the
+# delay timeout.
+#
+# - The same when master and slave have different timestamps.
+#
+# - Verify that delay is correct works when slave is lagging for
+# "natural" reasons (i.e., slow queries).
+#
+# - Verify that STOP SLAVE works instantly even during a delay, and
+# that it does not cause the waited-for event to be executed too
+# early on slave.
+#
+# - Verify that changing back to no delay works.
+#
+# - Verify that RESET SLAVE sets the delay to 0.
+#
+# - Verify that setting a bad value for the delay gives an error.
+#
+# ==== Method ====
+#
+# We run the slave with 10 seconds lag.
+#
+# In general, to test that a query has not been executed by the slave
+# before this time, we wait until the slave IO thread has received the
+# event, and then 5 seconds more, and check that the table has not
+# been updated. To test taht a query has been executed after this
+# time, we wait 10 seconds more.
+#
+# To simulate that the slave lags "naturally", we do "select
+# sleep(15)".
+#
+# ==== Related Bugs and Worklogs ====
+#
+# WL#344: Time-delayed replication
+# BUG#28760: Simulating a replication lag
+# [duplicate] BUG#22072: configurable delayed replication
+# [duplicate] BUG#21639: Add Replication Delay parameter
+#
+# ==== Issues with this Test Case ====
+#
+# The test is inherently timing-sensitive (i.e., contains races) and
+# is likely to fail sporadically on a loaded host.
+#
+# The test takes a long time; it sleeps for around 20*10 seconds.
+
+--source include/master-slave.inc
+--source include/have_binlog_format_statement.inc
+
+
+# We assume that any simple operation take zero time, with an error
+# margin of $time1 seconds. Hence, if we run with a delay of $time2
+# seconds, we expect that:
+# - If we execute a query on master and wait $time1 seconds, then the
+# query has been copied to slave but not yet executed.
+# - If we execute a query on master and wait $time3 seconds, then the
+# query has been executed.
+--let $time1= 10
+if (`SELECT '$max_query_execution_time' > 0`) {
+ --let $time1= $max_query_execution_time
+}
+--let $time2= `SELECT 2 * $time1`
+--let $time3= `SELECT 3 * $time1`
+
+
+--echo [on master]
+CREATE TABLE t1 (a VARCHAR(100), b INT AUTO_INCREMENT PRIMARY KEY);
+
+
+--echo ==== Normal setup ====
+
+--echo [on slave]
+--sync_slave_with_master
+
+--source include/stop_slave.inc
+eval CHANGE MASTER TO MASTER_DELAY = $time2;
+--echo # Checking that delay is what we set it to
+--let $delay= query_get_value(SHOW SLAVE STATUS, SQL_Delay, 1)
+if (`SELECT $delay != $time2`) {
+ --echo Delay is wrong! Expected $time2, got $delay
+ --die wrong delay
+}
+--echo # Expect status to be ''
+#--let $status= query_get_value(SHOW SLAVE STATUS, Slave_SQL_Running_State, 1)
+#--echo Slave_SQL_Running_State='$status'
+SHOW PROCESSLIST;
+--source include/start_slave.inc
+
+--echo [on master]
+--connection master
+INSERT INTO t1(a) VALUES ('normal setup');
+
+--source extra/rpl_tests/delayed_slave_wait_on_query.inc
+
+
+--echo ==== Slave lags "naturally" after master ====
+
+--echo [on master]
+--connection master
+
+--eval CREATE FUNCTION delay_on_slave(time INT) RETURNS INT BEGIN IF @@server_id = 2 THEN RETURN SLEEP(time); ELSE RETURN 0; END IF; END
+
+eval INSERT INTO t1(a) SELECT delay_on_slave($time3);
+--save_master_pos
+INSERT INTO t1(a) VALUES ('slave is already lagging: this statement should execute immediately');
+eval INSERT INTO t1(a) SELECT delay_on_slave($time2);
+
+--echo [on slave]
+--source include/sync_slave_io_with_master.inc
+--echo <sleep 1>
+--sleep $time1
+
+--echo # Expect no query executed and status is Waiting
+SELECT * FROM t1 ORDER BY b DESC LIMIT 1;
+#--let $status= query_get_value(SHOW SLAVE STATUS, Slave_SQL_Running_State, 1)
+#--echo Slave_SQL_Running_State='$status'
+SHOW PROCESSLIST;
+--let $delay= query_get_value(SHOW SLAVE STATUS, SQL_Remaining_Delay, 1)
+--echo SQL_Remaining_Delay='$delay'
+
+--echo <wait for first query to execute>
+--sync_with_master
+
+--source extra/rpl_tests/delayed_slave_wait_on_query.inc
+
+
+--echo ==== STOP SLAVE and START SLAVE ====
+
+# Set up a longer delay.
+--source include/stop_slave.inc
+eval CHANGE MASTER TO MASTER_DELAY = $time3;
+--source include/start_slave.inc
+
+--echo # Checking that delay is what we set it to
+--let $delay= query_get_value(SHOW SLAVE STATUS, SQL_Delay, 1)
+if (`SELECT $delay != $time3`) {
+ --echo Delay is wrong! Expected $time2, got $delay
+ --die wrong delay
+}
+
+--echo [on master]
+--connection master
+INSERT INTO t1(a) VALUES ('stop slave and start slave');
+
+--echo [on slave]
+--connection slave
+--echo <sleep 1>
+--sleep $time1
+--source include/stop_slave.inc
+
+--echo # Expect query not executed and status is Waiting
+SELECT * FROM t1 ORDER BY b DESC LIMIT 1;
+#--let $status= query_get_value(SHOW SLAVE STATUS, Slave_SQL_Running_State, 1)
+#--echo Slave_SQL_Running_State='$status'
+SHOW PROCESSLIST;
+
+--source include/start_slave.inc
+
+--source extra/rpl_tests/delayed_slave_wait_on_query.inc
+
+
+--echo ==== Change back to no delay ====
+
+--echo [on master]
+--connection master
+SET TIMESTAMP = 0;
+
+--echo [on slave]
+--connection slave
+SET TIMESTAMP = 0;
+--source include/stop_slave.inc
+eval CHANGE MASTER TO MASTER_DELAY = 0;
+
+--echo # Expect delay is 0.
+--let $delay= query_get_value(SHOW SLAVE STATUS, SQL_Delay, 1)
+--echo SQL_Delay='$delay'
+
+--source include/start_slave.inc
+
+--echo [on master]
+--connection master
+INSERT INTO t1(a) VALUES ('change back to no delay');
+
+--echo [on slave]
+--source include/sync_slave_io_with_master.inc
+--echo <sleep 1>
+--sleep $time1
+
+--echo # Expect query executed.
+SELECT * FROM t1 ORDER BY b DESC LIMIT 1;
+#--let $status= query_get_value(SHOW SLAVE STATUS, Slave_SQL_Running_State, 1)
+#--echo Slave_SQL_Running_State='$status'
+SHOW PROCESSLIST;
+
+
+--echo ==== Reset delay with RESET SLAVE ====
+
+--source include/stop_slave.inc
+CHANGE MASTER TO MASTER_DELAY = 71;
+--source include/start_slave.inc
+
+--echo # Expect delay is 71
+--let $delay= query_get_value(SHOW SLAVE STATUS, SQL_Delay, 1)
+--echo SQL_Delay='$delay'
+
+--source include/stop_slave.inc
+RESET SLAVE;
+--echo [on master]
+--connection master
+RESET MASTER;
+--echo [on slave]
+--connection slave
+--source include/start_slave.inc
+
+--echo # Expect delay is 0
+--let $delay= query_get_value(SHOW SLAVE STATUS, SQL_Delay, 1)
+--echo SQL_Delay='$delay'
+
+
+--echo ==== Set a bad value for the delay ====
+
+--source include/stop_slave.inc
+
+--echo # Expect error for setting negative delay
+--error ER_PARSE_ERROR
+CHANGE MASTER TO MASTER_DELAY = -1;
+
+--echo # Expect error for setting delay between 2^31 and 2^32
+--error ER_MASTER_DELAY_VALUE_OUT_OF_RANGE
+CHANGE MASTER TO MASTER_DELAY = 3000000000;
+
+--echo # Expect error for setting delay to nonsense
+--error ER_PARSE_ERROR
+CHANGE MASTER TO MASTER_DELAY = blah;
+
+# todo: CHANGE MASTER TO MASTER_DELAY = 999999999999999999999999999
+# should give error
+
+CHANGE MASTER TO MASTER_DELAY = 0;
+--source include/start_slave.inc
+
+
+--echo ==== Clean up ====
+
+--echo [on master]
+--connection master
+DROP TABLE t1;
+DROP FUNCTION delay_on_slave;
+
+--echo [on slave]
+--sync_slave_with_master
+
+--source include/master-slave-end.inc
=== modified file 'sql/lex.h'
--- a/sql/lex.h 2009-07-29 08:54:20 +0000
+++ b/sql/lex.h 2010-03-15 07:02:17 +0000
@@ -306,6 +306,7 @@ static SYMBOL symbols[] = {
{ "LOW_PRIORITY", SYM(LOW_PRIORITY)},
{ "MASTER", SYM(MASTER_SYM)},
{ "MASTER_CONNECT_RETRY", SYM(MASTER_CONNECT_RETRY_SYM)},
+ { "MASTER_DELAY", SYM(MASTER_DELAY_SYM)},
{ "MASTER_HOST", SYM(MASTER_HOST_SYM)},
{ "MASTER_LOG_FILE", SYM(MASTER_LOG_FILE_SYM)},
{ "MASTER_LOG_POS", SYM(MASTER_LOG_POS_SYM)},
=== modified file 'sql/log.cc'
--- a/sql/log.cc 2010-02-05 17:01:09 +0000
+++ b/sql/log.cc 2010-03-15 07:02:17 +0000
@@ -3078,6 +3078,10 @@ err:
relay log.
IMPLEMENTATION
+
+ - You must hold rli->data_lock before calling this function, since
+ it writes group_relay_log_pos and similar fields of
+ Relay_log_info.
- Protects index file with LOCK_index
- Delete relevant relay log files
- Copy all file names after these ones to the front of the index file
@@ -3091,7 +3095,7 @@ err:
read by the SQL slave thread are deleted).
@note
- - This is only called from the slave-execute thread when it has read
+ - This is only called from the slave SQL thread when it has read
all commands from a relay log and want to switch to a new relay log.
- When this happens, we can be in an active transaction as
a transaction can span over two relay logs
@@ -3120,6 +3124,8 @@ int MYSQL_BIN_LOG::purge_first_log(Relay
DBUG_ASSERT(rli->slave_running == 1);
DBUG_ASSERT(!strcmp(rli->linfo.log_file_name,rli->event_relay_log_name));
+ safe_mutex_assert_owner(&rli->data_lock);
+
pthread_mutex_lock(&LOCK_index);
to_purge_if_included= my_strdup(rli->group_relay_log_name, MYF(0));
=== modified file 'sql/log_event.cc'
--- a/sql/log_event.cc 2010-03-08 23:55:19 +0000
+++ b/sql/log_event.cc 2010-03-15 07:02:17 +0000
@@ -4935,6 +4935,9 @@ bool Rotate_log_event::write(IO_CACHE* f
in a A -> B -> A setup.
The NOTES below is a wrong comment which will disappear when 4.1 is merged.
+ This must only be called from the Slave SQL thread, since it calls
+ flush_relay_log_info().
+
@retval
0 ok
*/
@@ -5947,6 +5950,9 @@ void Stop_log_event::print(FILE* file, P
were we must do this cleaning is in
Start_log_event_v3::do_apply_event(), not here. Because if we come
here, the master was sane.
+
+ This must only be called from the Slave SQL thread, since it calls
+ flush_relay_log_info().
*/
int Stop_log_event::do_update_pos(Relay_log_info *rli)
{
=== modified file 'sql/rpl_rli.cc'
--- a/sql/rpl_rli.cc 2009-12-14 16:50:22 +0000
+++ b/sql/rpl_rli.cc 2010-03-15 07:02:17 +0000
@@ -34,16 +34,15 @@ Relay_log_info::Relay_log_info()
no_storage(FALSE), replicate_same_server_id(::replicate_same_server_id),
info_fd(-1), cur_log_fd(-1), save_temporary_tables(0),
cur_log_old_open_count(0), group_relay_log_pos(0), event_relay_log_pos(0),
-#if HAVE_purify
is_fake(FALSE),
-#endif
group_master_log_pos(0), log_space_total(0), ignore_log_space_limit(0),
last_master_timestamp(0), slave_skip_counter(0),
abort_pos_wait(0), slave_run_id(0), sql_thd(0),
inited(0), abort_slave(0), slave_running(0), until_condition(UNTIL_NONE),
until_log_pos(0), retried_trans(0),
tables_to_lock(0), tables_to_lock_count(0),
- last_event_start_time(0), m_flags(0)
+ last_event_start_time(0), m_flags(0),
+ delay(0), delay_end(0)
{
DBUG_ENTER("Relay_log_info::Relay_log_info");
@@ -223,7 +222,7 @@ file '%s', errno %d)", fname, my_errno);
if (init_relay_log_pos(rli,NullS,BIN_LOG_HEADER_SIZE,0 /* no data lock */,
&msg, 0))
{
- sql_print_error("Failed to open the relay log 'FIRST' (relay_log_pos 4)");
+ sql_print_warning("Failed to open the relay log 'FIRST' (relay_log_pos 4)");
goto err;
}
rli->group_master_log_name[0]= 0;
@@ -263,20 +262,52 @@ Failed to open the existing relay log in
}
rli->info_fd = info_fd;
- int relay_log_pos, master_log_pos;
+ int relay_log_pos, master_log_pos, lines;
+ char *first_non_digit;
+
if (init_strvar_from_file(rli->group_relay_log_name,
sizeof(rli->group_relay_log_name),
- &rli->info_file, "") ||
- init_intvar_from_file(&relay_log_pos,
- &rli->info_file, BIN_LOG_HEADER_SIZE) ||
- init_strvar_from_file(rli->group_master_log_name,
- sizeof(rli->group_master_log_name),
- &rli->info_file, "") ||
- init_intvar_from_file(&master_log_pos, &rli->info_file, 0))
+ &rli->info_file, ""))
{
msg="Error reading slave log configuration";
goto err;
}
+
+ lines= strtoul(rli->group_relay_log_name, &first_non_digit, 10);
+
+ if (rli->group_relay_log_name[0]!='\0' && lines == 5)
+ {
+ /* Seems to be new format => read relay log name from next line */
+ if (init_strvar_from_file(rli->group_relay_log_name,
+ sizeof(rli->group_relay_log_name),
+ &rli->info_file, "") ||
+ init_intvar_from_file(&relay_log_pos,
+ &rli->info_file, BIN_LOG_HEADER_SIZE) ||
+ init_strvar_from_file(rli->group_master_log_name,
+ sizeof(rli->group_master_log_name),
+ &rli->info_file, "") ||
+ init_intvar_from_file(&master_log_pos, &rli->info_file, 0) ||
+ init_intvar_from_file(&rli->delay, &rli->info_file, 0))
+ {
+ msg="Error reading slave log configuration";
+ goto err;
+ }
+ }
+ else
+ {
+ if (init_intvar_from_file(&relay_log_pos,
+ &rli->info_file, BIN_LOG_HEADER_SIZE) ||
+ init_strvar_from_file(rli->group_master_log_name,
+ sizeof(rli->group_master_log_name),
+ &rli->info_file, "") ||
+ init_intvar_from_file(&master_log_pos, &rli->info_file, 0) ||
+ init_intvar_from_file(&rli->delay, &rli->info_file, 0))
+ {
+ msg="Error reading slave log configuration";
+ goto err;
+ }
+ }
+
strmake(rli->event_relay_log_name,rli->group_relay_log_name,
sizeof(rli->event_relay_log_name)-1);
rli->group_relay_log_pos= rli->event_relay_log_pos= relay_log_pos;
@@ -825,7 +856,9 @@ void Relay_log_info::inc_group_relay_log
{
DBUG_ENTER("Relay_log_info::inc_group_relay_log_pos");
- if (!skip_lock)
+ if (skip_lock)
+ safe_mutex_assert_owner(&data_lock);
+ else
pthread_mutex_lock(&data_lock);
inc_event_relay_log_pos();
group_relay_log_pos= event_relay_log_pos;
@@ -1166,7 +1199,8 @@ void Relay_log_info::stmt_done(my_off_t
else
{
inc_group_relay_log_pos(event_master_log_pos);
- flush_relay_log_info(this);
+ if (!is_fake)
+ flush_relay_log_info(this);
/*
Note that Rotate_log_event::do_apply_event() does not call this
function, so there is no chance that a fake rotate event resets
=== modified file 'sql/rpl_rli.h'
--- a/sql/rpl_rli.h 2009-12-14 16:32:22 +0000
+++ b/sql/rpl_rli.h 2010-03-15 07:02:17 +0000
@@ -147,6 +147,10 @@ public:
relay log and finishing (commiting) on another relay log. Case which can
happen when, for example, the relay log gets rotated because of
max_binlog_size.
+
+ Note: group_relay_log_name, group_relay_log_pos must only be
+ written from the thread owning the Relay_log_info (SQL thread if
+ !is_fake; client thread executing BINLOG statement if is_fake).
*/
char group_relay_log_name[FN_REFLEN];
ulonglong group_relay_log_pos;
@@ -154,9 +158,13 @@ public:
ulonglong event_relay_log_pos;
ulonglong future_event_relay_log_pos;
-#ifdef HAVE_purify
- bool is_fake; /* Mark that this is a fake relay log info structure */
-#endif
+ /*
+ False for the Relay_log_info object that belongs to the sql
+ thread; true for the Relay_log_info that belongs to a client. In
+ other words, non-fake rli's are used for replication, whereas fake
+ ones are used to execute BINLOG statements.
+ */
+ bool is_fake;
/*
Original log name and position of the group we're currently executing
@@ -164,6 +172,10 @@ public:
in the master's binlog. These concern the *group*, because in the master's
binlog the log_pos that comes with each event is the position of the
beginning of the group.
+
+ Note: group_master_log_name, group_master_log_pos must only be
+ written from the thread owning the Relay_log_info (SQL thread if
+ !is_fake; client thread executing BINLOG statement if is_fake).
*/
char group_master_log_name[FN_REFLEN];
volatile my_off_t group_master_log_pos;
@@ -296,7 +308,7 @@ public:
}
void inc_group_relay_log_pos(ulonglong log_pos,
- bool skip_lock=0);
+ bool skip_lock=0);
int wait_for_pos(THD* thd, String* log_name, longlong log_pos,
longlong timeout);
@@ -345,6 +357,34 @@ public:
time_t last_event_start_time;
/**
+ Delay replication slave by this amount, compared to master (in
+ seconds). This is set with CHANGE MASTER TO MASTER_DELAY=X.
+
+ Guarded by data_lock. Initialized by the client thread executing
+ START SLAVE. Written by client threads executing CHANGE MASTER TO
+ MASTER_DELAY=X. Read by SQL thread and by client threads
+ executing SHOW SLAVE STATUS. Note: must not be written while the
+ slave SQL thread is running, since the SQL thread reads it without
+ a lock when executing flush_relay_log_info().
+ */
+ int delay;
+ /**
+ During a delay, specifies the point in time when the delay ends.
+
+ This is used for the SQL_Remaining_Delay column in SHOW SLAVE STATUS.
+ Special values:
+
+ - 0 when the sql thread executes an event.
+
+ - 1 when the sql thread is waiting for events to show up in the
+ binlog.
+
+ Guarded by data_lock. Written by the sql thread. Read by client
+ threads executing SHOW SLAVE STATUS.
+ */
+ time_t delay_end;
+
+ /**
Helper function to do after statement completion.
This function is called from an event to complete the group by
=== modified file 'sql/share/errmsg.txt'
--- a/sql/share/errmsg.txt 2010-02-09 10:30:50 +0000
+++ b/sql/share/errmsg.txt 2010-03-15 07:02:17 +0000
@@ -6213,3 +6213,8 @@ ER_DEBUG_SYNC_TIMEOUT
ER_DEBUG_SYNC_HIT_LIMIT
eng "debug sync point hit limit reached"
ger "Debug Sync Point Hit Limit erreicht"
+
+ER_MASTER_DELAY_VALUE_OUT_OF_RANGE
+ eng "The requested value %u for the master delay exceeds the maximum %u"
+ER_ONLY_FD_AND_RBR_EVENTS_ALLOWED_IN_BINLOG_STATEMENT
+ eng "Only Format_description_log_event and row events are allowed in BINLOG statements (but %s was provided)"
=== modified file 'sql/slave.cc'
--- a/sql/slave.cc 2010-02-25 10:39:43 +0000
+++ b/sql/slave.cc 2010-03-15 07:02:17 +0000
@@ -987,8 +987,10 @@ static int get_master_version_and_clock(
(master_res= mysql_store_result(mysql)) &&
(master_row= mysql_fetch_row(master_res)))
{
+ pthread_mutex_lock(&mi->data_lock);
mi->clock_diff_with_master=
(long) (time((time_t*) 0) - strtoul(master_row[0], 0, 10));
+ pthread_mutex_unlock(&mi->data_lock);
}
else if (is_network_error(mysql_errno(mysql)))
{
@@ -998,7 +1000,9 @@ static int get_master_version_and_clock(
}
else
{
+ pthread_mutex_lock(&mi->data_lock);
mi->clock_diff_with_master= 0; /* The "most sensible" value */
+ pthread_mutex_unlock(&mi->data_lock);
sql_print_warning("\"SELECT UNIX_TIMESTAMP()\" failed on master, "
"do not trust column Seconds_Behind_Master of SHOW "
"SLAVE STATUS. Error: %s (%d)",
@@ -1605,6 +1609,8 @@ bool show_master_info(THD* thd, Master_i
field_list.push_back(new Item_empty_string("Last_IO_Error", 20));
field_list.push_back(new Item_return_int("Last_SQL_Errno", 4, MYSQL_TYPE_LONG));
field_list.push_back(new Item_empty_string("Last_SQL_Error", 20));
+ field_list.push_back(new Item_return_int("SQL_Delay", 10, MYSQL_TYPE_LONG));
+ field_list.push_back(new Item_return_int("SQL_Remaining_Delay", 8, MYSQL_TYPE_LONG));
if (protocol->send_fields(&field_list,
Protocol::SEND_NUM_ROWS | Protocol::SEND_EOF))
@@ -1726,6 +1732,14 @@ bool show_master_info(THD* thd, Master_i
protocol->store(mi->rli.last_error().number);
// Last_SQL_Error
protocol->store(mi->rli.last_error().message, &my_charset_bin);
+ // Delay
+ protocol->store((uint32) mi->rli.delay);
+ // Remaining_Delay
+ float remaining_time= mi->rli.delay_end - my_time(0);
+ if (remaining_time > 0.01 )
+ protocol->store((uint32)remaining_time);
+ else
+ protocol->store_null();
pthread_mutex_unlock(&mi->rli.err_lock);
pthread_mutex_unlock(&mi->err_lock);
@@ -2058,23 +2072,102 @@ static int has_temporary_error(THD *thd)
/**
+ If this is a lagging slave (specified with CHANGE MASTER TO MASTER_DELAY = X), delays accordingly. Also unlocks rli->data_lock.
+
+ Design note: this is the place to unlock rli->data_lock here since
+ it should be held when reading delay info from rli, but it should
+ not be held while sleeping.
+
+ @param ev Event that is about to be executed.
+
+ @param thd The sql thread's THD object.
+
+ @param rli The sql thread's Relay_log_info structure.
+*/
+static void delay_event(Log_event *ev, THD *thd, Relay_log_info *rli)
+{
+ long delay= rli->delay;
+
+ DBUG_ENTER("delay_event");
+ safe_mutex_assert_owner(&rli->data_lock);
+ DBUG_ASSERT(!rli->is_fake);
+
+ if (delay && ev->get_type_code() != ROTATE_EVENT &&
+ ev->get_type_code() != FORMAT_DESCRIPTION_EVENT)
+ {
+ time_t delay_end= rli->delay_end=
+ ev->when + rli->mi->clock_diff_with_master + delay;
+
+ time_t now= my_time(0);
+
+ unsigned long nap_time= 0;
+ if (delay_end > now)
+ nap_time= delay_end - now;
+
+ DBUG_PRINT("info", ("delay= %lu "
+ "ev->when= %lu "
+ "rli->mi->clock_diff_with_master= %lu "
+ "now= %lu "
+ "delay_end= %lu "
+ "nap_time= %u",
+ delay, (long)ev->when, rli->mi->clock_diff_with_master,
+ (long)now, delay_end, (long)nap_time));
+
+ if (delay_end > now)
+ {
+ DBUG_PRINT("info", ("delaying replication event %lu secs",
+ nap_time));
+ thd_proc_info(rli->sql_thd, "Waiting until MASTER_DELAY seconds after master executed event");
+ pthread_mutex_unlock(&rli->data_lock);
+ safe_sleep(thd, nap_time, (CHECK_KILLED_FUNC)sql_slave_killed,
+ (void*)rli);
+ DBUG_VOID_RETURN;
+ }
+ }
+
+ pthread_mutex_unlock(&rli->data_lock);
+
+ DBUG_VOID_RETURN;
+}
+
+
+/**
Applies the given event and advances the relay log position.
- In essence, this function does:
+ This is needed by the sql thread to execute events from the binlog,
+ and by clients executing BINLOG statements. Conceptually, this
+ function does:
@code
ev->apply_event(rli);
ev->update_pos(rli);
@endcode
- But it also does some maintainance, such as skipping events if
- needed and reporting errors.
+ It also does the following maintainance:
- If the @c skip flag is set, then it is tested whether the event
- should be skipped, by looking at the slave_skip_counter and the
- server id. The skip flag should be set when calling this from a
- replication thread but not set when executing an explicit BINLOG
- statement.
+ - Initializes the thread's server_id and time; and the event's
+ thread.
+
+ - If rli is not fake (i.e., if it belongs to the slave sql thread
+ instead of being used for executing BINLOG statements), it does
+ the following things: (1) skips events if it is needed according
+ to the server id or slave_skip_counter; (2) unlocks
+ rli->data_lock; (3) sleeps appropriately if this is a
+ time-delayed slave (specified by CHANGE MASTER TO
+ MASTER_DELAY=X); (4) maintains the running state of the sql
+ thread (rli->thread_state).
+
+ - Reports errors as needed.
+
+ @param ev The event to apply.
+
+ @param thd The client thread that executes the event (i.e., the
+ slave sql thread if called from a replication slave, or the client
+ thread if called to execute a BINLOG statement).
+
+ @param rli The relay log info (i.e., the slave's rli if called from
+ a replication slave, or the client's thd->rli_fake if called to
+ execute a BINLOG statement).
@retval 0 OK.
@@ -2131,9 +2224,17 @@ int apply_event_and_update_pos(Log_event
int reason= ev->shall_skip(rli);
if (reason == Log_event::EVENT_SKIP_COUNT)
--rli->slave_skip_counter;
- pthread_mutex_unlock(&rli->data_lock);
+
if (reason == Log_event::EVENT_SKIP_NOT)
+ {
+ // Sleeps if needed, and unlocks rli->data_lock.
+ delay_event(ev, thd, rli);
+ thd_proc_info(rli->sql_thd, "Executing event");
exec_res= ev->apply_event(rli);
+ thd_proc_info(rli->sql_thd, "Waiting to read more events from relay log");
+ }
+ else
+ pthread_mutex_unlock(&rli->data_lock);
#ifndef DBUG_OFF
/*
@@ -2160,14 +2261,11 @@ int apply_event_and_update_pos(Log_event
if (exec_res == 0)
{
int error= ev->update_pos(rli);
-#ifdef HAVE_purify
+#ifndef DBUG_OFF
+ DBUG_PRINT("info", ("update_pos error = %d", error));
if (!rli->is_fake)
-#endif
{
-#ifndef DBUG_OFF
char buf[22];
-#endif
- DBUG_PRINT("info", ("update_pos error = %d", error));
DBUG_PRINT("info", ("group %s %s",
llstr(rli->group_relay_log_pos, buf),
rli->group_relay_log_name));
@@ -2175,6 +2273,7 @@ int apply_event_and_update_pos(Log_event
llstr(rli->event_relay_log_pos, buf),
rli->event_relay_log_name));
}
+#endif
/*
The update should not fail, so print an error message and
return an error code.
@@ -2201,7 +2300,8 @@ int apply_event_and_update_pos(Log_event
/**
- Top-level function for executing the next event from the relay log.
+ Top-level function for executing the next event in the relay log
+ from the SQL thread.
This function reads the event from the relay log, executes it, and
advances the relay log position. It also handles errors, etc.
@@ -2558,8 +2658,10 @@ pthread_handler_t handle_slave_io(void *
connected:
DBUG_SYNC_POINT("debug_lock.before_get_running_status_yes", 10);
- // TODO: the assignment below should be under mutex (5.0)
+ pthread_mutex_lock(&mi->run_lock);
mi->slave_running= MYSQL_SLAVE_RUN_CONNECT;
+ pthread_mutex_unlock(&mi->run_lock);
+
thd->slave_net = &mysql->net;
thd_proc_info(thd, "Checking master version");
ret= get_master_version_and_clock(mysql, mi);
@@ -3010,6 +3112,8 @@ log '%s' at position %s, relay log '%s'
pthread_mutex_unlock(&rli->data_lock);
goto err;
}
+
+ thd_proc_info(rli->sql_thd, "Waiting to read more events from relay log");
pthread_mutex_unlock(&rli->data_lock);
/* Read queries from the IO/THREAD until this thread is killed */
@@ -3908,55 +4012,74 @@ static int safe_reconnect(THD* thd, MYSQ
}
-/*
- Store the file and position where the execute-slave thread are in the
+/**
+ Store the file and position where the slave's SQL thread are in the
relay log.
- SYNOPSIS
- flush_relay_log_info()
- rli Relay log information
+ Notes:
- NOTES
- - As this is only called by the slave thread, we don't need to
- have a lock on this.
- - If there is an active transaction, then we don't update the position
- in the relay log. This is to ensure that we re-execute statements
- if we die in the middle of an transaction that was rolled back.
- - As a transaction never spans binary logs, we don't have to handle the
- case where we do a relay-log-rotation in the middle of the transaction.
- If this would not be the case, we would have to ensure that we
- don't delete the relay log file where the transaction started when
- we switch to a new relay log file.
-
- TODO
- - Change the log file information to a binary format to avoid calling
- longlong2str.
+ - This function should be called either from the slave SQL thread,
+ or when the slave thread is not running. (It reads the
+ group_{relay|master}_log_{pos|name} and delay fields in the rli
+ object. These may only be modified by the slave SQL thread or by
+ a client thread when the slave SQL thread is not running.)
+
+ - If there is an active transaction, then we do not update the
+ position in the relay log. This is to ensure that we re-execute
+ statements if we die in the middle of an transaction that was
+ rolled back.
+
+ - As a transaction never spans binary logs, we don't have to handle
+ the case where we do a relay-log-rotation in the middle of the
+ transaction. If transactions could span several binlogs, we would
+ have to ensure that we do not delete the relay log file where the
+ transaction started before switching to a new relay log file.
+
+ - Error can happen if writing to file fails or if flushing the file
+ fails.
+
+ @param rli The object representing the Relay_log_info.
+
+ @param unlock_data_lock If set, rli->data_lock will be unlocked by
+ this function, otherwise it will be kept. This is useful if the
+ lock is not needed after the call to this function, because this
+ function does expensive disk operations after it stopped depending
+ on the lock.
- RETURN VALUES
- 0 ok
- 1 write error
-*/
+ @todo Change the log file information to a binary format to avoid
+ calling longlong2str.
+ @todo Refactor this to become a member function of Relay_log_info.
+
+ @return 0 on success, 1 on error.
+*/
bool flush_relay_log_info(Relay_log_info* rli)
{
bool error=0;
+ unsigned long lines= 5;
DBUG_ENTER("flush_relay_log_info");
+ DBUG_ASSERT(!rli->is_fake);
+
if (unlikely(rli->no_storage))
DBUG_RETURN(0);
IO_CACHE *file = &rli->info_file;
- char buff[FN_REFLEN*2+22*2+4], *pos;
-
+ // 2*file name, 2*long long, 2*unsigned long, 6*'\n'
+ char buff[FN_REFLEN*2+22*2+10*2+6], *pos;
my_b_seek(file, 0L);
- pos=strmov(buff, rli->group_relay_log_name);
+ pos= longlong2str(lines, buff, 10);
+ *pos++='\n';
+ pos=strmov(pos, rli->group_relay_log_name);
*pos++='\n';
pos=longlong2str(rli->group_relay_log_pos, pos, 10);
*pos++='\n';
pos=strmov(pos, rli->group_master_log_name);
*pos++='\n';
pos=longlong2str(rli->group_master_log_pos, pos, 10);
- *pos='\n';
+ *pos++= '\n';
+ pos= longlong2str(rli->delay, pos, 10);
+ *pos= '\n';
if (my_b_write(file, (uchar*) buff, (size_t) (pos-buff)+1))
error=1;
if (flush_io_cache(file))
=== modified file 'sql/slave.h'
--- a/sql/slave.h 2009-12-14 16:32:22 +0000
+++ b/sql/slave.h 2010-03-15 07:02:17 +0000
@@ -17,6 +17,11 @@
#define SLAVE_H
/**
+ MASTER_DELAY can be at most INT32_MAX.
+*/
+#define MASTER_DELAY_MAX (0X7FFFFFFF)
+
+/**
@defgroup Replication Replication
@{
@@ -81,12 +86,14 @@ class Master_info;
In Master_info: run_lock, data_lock
run_lock protects all information about the run state: slave_running, thd
- and the existence of the I/O thread to stop/start it, you need this mutex).
+ and the existence of the I/O thread (to stop/start it, you need this mutex).
data_lock protects some moving members of the struct: counters (log name,
position) and relay log (MYSQL_BIN_LOG object).
In Relay_log_info: run_lock, data_lock
see Master_info
+ However, note that run_lock does not protect
+ Relay_log_info.run_state; that is protected by data_lock.
Order of acquisition: if you want to have LOCK_active_mi and a run_lock, you
must acquire LOCK_active_mi first.
=== modified file 'sql/sql_binlog.cc'
--- a/sql/sql_binlog.cc 2009-10-14 01:39:05 +0000
+++ b/sql/sql_binlog.cc 2010-03-15 07:02:17 +0000
@@ -62,9 +62,7 @@ void mysql_client_binlog_statement(THD*
if (!rli)
{
rli= thd->rli_fake= new Relay_log_info;
-#ifdef HAVE_purify
rli->is_fake= TRUE;
-#endif
have_fd_event= FALSE;
}
if (rli && !rli->relay_log.description_event_for_exec)
@@ -159,9 +157,9 @@ void mysql_client_binlog_statement(THD*
see one; it is the only statement that can be read in base64
without a prior Format_description_event.
*/
+ int type = bufptr[EVENT_TYPE_OFFSET];
if (!have_fd_event)
{
- int type = bufptr[EVENT_TYPE_OFFSET];
if (type == FORMAT_DESCRIPTION_EVENT || type == START_EVENT_V3)
have_fd_event= TRUE;
else
@@ -172,6 +170,29 @@ void mysql_client_binlog_statement(THD*
}
}
+ /*
+ Do not execute other events than row-events and FD events. It
+ is crucial to avoid executing Stop_log_event and
+ Rotate_log_event since they call flush_relay_log_info, which
+ is not allowed to call by other threads than the slave SQL
+ thread when the slave SQL thread is running.
+ */
+ switch (type) {
+ case FORMAT_DESCRIPTION_EVENT:
+ case TABLE_MAP_EVENT:
+ case WRITE_ROWS_EVENT:
+ case UPDATE_ROWS_EVENT:
+ case DELETE_ROWS_EVENT:
+ case PRE_GA_WRITE_ROWS_EVENT:
+ case PRE_GA_UPDATE_ROWS_EVENT:
+ case PRE_GA_DELETE_ROWS_EVENT:
+ /* only the above event types are allowed */
+ break;
+ default:
+ my_error(ER_ONLY_FD_AND_RBR_EVENTS_ALLOWED_IN_BINLOG_STATEMENT,
+ MYF(0), Log_event::get_type_str((Log_event_type)type));
+ }
+
ev= Log_event::read_log_event(bufptr, event_len, &error,
rli->relay_log.description_event_for_exec);
@@ -201,7 +222,7 @@ void mysql_client_binlog_statement(THD*
bytes_decoded, (long) bufptr,
(ulong) uint4korr(bufptr+EVENT_LEN_OFFSET)));
#endif
- ev->thd= thd;
+
/*
We go directly to the application phase, since we don't need
to check if the event shall be skipped or not.
=== modified file 'sql/sql_lex.cc'
--- a/sql/sql_lex.cc 2010-02-06 19:54:30 +0000
+++ b/sql/sql_lex.cc 2010-03-15 07:02:17 +0000
@@ -3007,3 +3007,12 @@ bool st_lex::is_partition_management() c
alter_info.flags == ALTER_REORGANIZE_PARTITION));
}
+
+/**
+ Set all fields to their "unspecified" value.
+*/
+void st_lex_master_info::set_unspecified()
+{
+ bzero((char*) this, sizeof(*this));
+ delay= -1;
+}
=== modified file 'sql/sql_lex.h'
--- a/sql/sql_lex.h 2010-02-06 19:54:30 +0000
+++ b/sql/sql_lex.h 2010-03-15 07:02:17 +0000
@@ -203,17 +203,20 @@ typedef struct st_lex_master_info
{
char *host, *user, *password, *log_file_name;
uint port, connect_retry;
+ int delay;
ulonglong pos;
ulong server_id;
/*
Enum is used for making it possible to detect if the user
changed variable or if it should be left at old value
*/
- enum {SSL_UNCHANGED, SSL_DISABLE, SSL_ENABLE}
+ enum {SSL_UNCHANGED= 0, SSL_DISABLE, SSL_ENABLE}
ssl, ssl_verify_server_cert;
char *ssl_key, *ssl_cert, *ssl_ca, *ssl_capath, *ssl_cipher;
char *relay_log_name;
ulong relay_log_pos;
+
+ void set_unspecified();
} LEX_MASTER_INFO;
=== modified file 'sql/sql_repl.cc'
--- a/sql/sql_repl.cc 2010-02-05 17:51:55 +0000
+++ b/sql/sql_repl.cc 2010-03-15 07:02:17 +0000
@@ -1193,6 +1193,9 @@ bool change_master(THD* thd, Master_info
if (lex_mi->connect_retry)
mi->connect_retry = lex_mi->connect_retry;
+ if (lex_mi->delay != -1)
+ mi->rli.delay= lex_mi->delay;
+
if (lex_mi->ssl != LEX_MASTER_INFO::SSL_UNCHANGED)
mi->ssl= (lex_mi->ssl == LEX_MASTER_INFO::SSL_ENABLE);
=== modified file 'sql/sql_yacc.yy'
--- a/sql/sql_yacc.yy 2010-02-26 13:16:46 +0000
+++ b/sql/sql_yacc.yy 2010-03-15 07:02:17 +0000
@@ -960,6 +960,7 @@ bool my_yyoverflow(short **a, YYSTYPE **
%token LOW_PRIORITY
%token LT /* OPERATOR */
%token MASTER_CONNECT_RETRY_SYM
+%token MASTER_DELAY_SYM
%token MASTER_HOST_SYM
%token MASTER_LOG_FILE_SYM
%token MASTER_LOG_POS_SYM
@@ -1721,7 +1722,7 @@ change:
{
LEX *lex = Lex;
lex->sql_command = SQLCOM_CHANGE_MASTER;
- bzero((char*) &lex->mi, sizeof(lex->mi));
+ lex->mi.set_unspecified();
}
master_defs
{}
@@ -1753,6 +1754,16 @@ master_def:
{
Lex->mi.connect_retry = $3;
}
+ | MASTER_DELAY_SYM EQ ulong_num
+ {
+ if ($3 > MASTER_DELAY_MAX)
+ {
+ my_error(ER_MASTER_DELAY_VALUE_OUT_OF_RANGE, MYF(0),
+ $3, MASTER_DELAY_MAX);
+ }
+ else
+ Lex->mi.delay = $3;
+ }
| MASTER_SSL_SYM EQ ulong_num
{
Lex->mi.ssl= $3 ?
@@ -6219,7 +6230,7 @@ slave:
lex->sql_command = SQLCOM_SLAVE_START;
lex->type = 0;
/* We'll use mi structure for UNTIL options */
- bzero((char*) &lex->mi, sizeof(lex->mi));
+ lex->mi.set_unspecified();
/* If you change this code don't forget to update SLAVE START too */
}
slave_until
@@ -6237,7 +6248,8 @@ slave:
lex->sql_command = SQLCOM_SLAVE_START;
lex->type = 0;
/* We'll use mi structure for UNTIL options */
- bzero((char*) &lex->mi, sizeof(lex->mi));
+ lex->mi.set_unspecified();
+ /* If you change this code don't forget to update START SLAVE too */
}
slave_until
{}
@@ -6246,6 +6258,7 @@ slave:
LEX *lex=Lex;
lex->sql_command = SQLCOM_SLAVE_STOP;
lex->type = 0;
+ /* If you change this code don't forget to update STOP SLAVE too */
}
;
@@ -11724,6 +11737,7 @@ keyword_sp:
| MASTER_PASSWORD_SYM {}
| MASTER_SERVER_ID_SYM {}
| MASTER_CONNECT_RETRY_SYM {}
+ | MASTER_DELAY_SYM {}
| MASTER_SSL_SYM {}
| MASTER_SSL_CA_SYM {}
| MASTER_SSL_CAPATH_SYM {}
Attachment: [text/bzr-bundle] bzr/dao-gang.qu@sun.com-20100315070217-88s7cyxvg7d3koxh.bundle
| Thread |
|---|
| • bzr commit into mysql-5.1-bugteam branch (Dao-Gang.Qu:3379) WL#344 | Dao-Gang.Qu | 15 Mar |