Below is the list of changes that have just been committed into a local
5.0 repository of elkin. When elkin does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html
ChangeSet@stripped, 2007-01-15 19:18:48+02:00, aelkin@stripped
+3 -0
Bug #20435 Relay logs are rotated at slave_net_timeout when there's no activity
When receiving no events from its master longer than slave_net_timeout
slave's replication io thread disconnects and reconnects. Reconnecting causes rotation
of the relay log. That is unnecessary work which also causes inconviniece because
of changes of the relay log files names: old files are removed, new are created.
The behavour of slave is refined not to rotate relay log files when master does not
rotate itself. Locally generated slave rotate events or master's events
that bring the binlog postion the same as slave already knows are ignorable.
This remains valid even though master was stopped and downgraded. After
reconnecting to a downgraded master, slave would receive first rotate and FD and
other events of the last binlog where it was interupped to receive from, and only
after that rotate and FD of new binlog of downgranded format version.
If slave reconnects to all time online master and gets with rotate
the same position it knows, then rotate event is discarded, relay log files remain
untouched also the event is not put into the current log.
The latter applies to reconnecting after slave_net_timeout which repairs
the bug.
mysql-test/r/rpl_relayrotate.result@stripped, 2007-01-15 19:18:46+02:00,
aelkin@stripped +73 -0
changed
mysql-test/t/rpl_relayrotate.test@stripped, 2007-01-15 19:18:46+02:00,
aelkin@stripped +28 -0
blank rotate event does not changed slave status, a check added.
sql/slave.cc@stripped, 2007-01-15 19:18:46+02:00,
aelkin@stripped +39 -4
do not call process_io_rotate if master is sending `fake' reconnecting
rotate event. Effective for all 3 binlog versions.
# This is a BitKeeper patch. What follows are the unified diffs for the
# set of deltas contained in the patch. The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User: aelkin
# Host: dsl-hkibras-fe36f900-97.dhcp.inet.fi
# Root: /home/elkin/MySQL/TEAM/FIXES/5.0/bug20435_relay_rot_reconn_fix2
--- 1.286/sql/slave.cc 2007-01-15 19:18:56 +02:00
+++ 1.287/sql/slave.cc 2007-01-15 19:18:56 +02:00
@@ -4173,6 +4173,14 @@ static int process_io_rotate(MASTER_INFO
DBUG_RETURN(0);
}
+
+/* slave ignores the event if master reported the current pos */
+inline bool ignore_rotate_event(MASTER_INFO *mi, Rotate_log_event *rev)
+{
+ return (strcmp(mi->master_log_name, rev->new_log_ident) == 0) &&
+ (mi->master_log_pos == rev->pos);
+}
+
/*
Reads a 3.23 event and converts it to the slave's format. This code was
copied from MySQL 4.0.
@@ -4234,7 +4242,13 @@ static int queue_binlog_ver_1_event(MAST
inc_pos= event_len;
break;
case ROTATE_EVENT:
- if (unlikely(process_io_rotate(mi,(Rotate_log_event*)ev)))
+ if (ignore_rotate_event(mi, (Rotate_log_event*) ev))
+ {
+ delete ev;
+ pthread_mutex_unlock(&mi->data_lock);
+ DBUG_RETURN(0);
+ }
+ if (unlikely(process_io_rotate(mi, (Rotate_log_event*) ev)))
{
delete ev;
pthread_mutex_unlock(&mi->data_lock);
@@ -4318,7 +4332,13 @@ static int queue_binlog_ver_3_event(MAST
case STOP_EVENT:
goto err;
case ROTATE_EVENT:
- if (unlikely(process_io_rotate(mi,(Rotate_log_event*)ev)))
+ if (ignore_rotate_event(mi, (Rotate_log_event*) ev))
+ {
+ delete ev;
+ pthread_mutex_unlock(&mi->data_lock);
+ DBUG_RETURN(0);
+ }
+ if (unlikely(process_io_rotate(mi, (Rotate_log_event*) ev)))
{
delete ev;
pthread_mutex_unlock(&mi->data_lock);
@@ -4414,8 +4434,23 @@ int queue_event(MASTER_INFO* mi,const ch
goto err;
case ROTATE_EVENT:
{
- Rotate_log_event rev(buf,event_len,mi->rli.relay_log.description_event_for_queue);
- if (unlikely(process_io_rotate(mi,&rev)))
+ Rotate_log_event
+ rev(buf, event_len, mi->rli.relay_log.description_event_for_queue);
+ if (ignore_rotate_event(mi, &rev))
+ {
+ /*
+ Master is telling us a position which we already know (this
+ can happen when we just reconnected, asking for some position,
+ then master always sends us a "fake" Rotate event
+ containing this same position). So, this event is useless. We
+ don't need to queue it to the relay log (saves space), to
+ rotate relay logs (releases from new files creating work).
+ */
+ pthread_mutex_unlock(&mi->data_lock);
+ DBUG_RETURN(0);
+ }
+
+ if (unlikely(process_io_rotate(mi, &rev)))
{
error= 1;
goto err;
--- 1.20/mysql-test/r/rpl_relayrotate.result 2007-01-15 19:18:56 +02:00
+++ 1.21/mysql-test/r/rpl_relayrotate.result 2007-01-15 19:18:56 +02:00
@@ -13,4 +13,77 @@ start slave;
select max(a) from t1;
max(a)
8000
+reset master;
+set @@global.slave_net_timeout=3;;
+stop slave;
+reset slave;
+start slave;
+show slave status;;
+Slave_IO_State #
+Master_Host 127.0.0.1
+Master_User root
+Master_Port MASTER_PORT
+Connect_Retry 1
+Master_Log_File master-bin.000001
+Read_Master_Log_Pos 98
+Relay_Log_File slave-relay-bin.000003
+Relay_Log_Pos 236
+Relay_Master_Log_File master-bin.000001
+Slave_IO_Running Yes
+Slave_SQL_Running Yes
+Replicate_Do_DB
+Replicate_Ignore_DB
+Replicate_Do_Table
+Replicate_Ignore_Table
+Replicate_Wild_Do_Table
+Replicate_Wild_Ignore_Table
+Last_Errno 0
+Last_Error
+Skip_Counter 0
+Exec_Master_Log_Pos 98
+Relay_Log_Space #
+Until_Condition None
+Until_Log_File
+Until_Log_Pos 0
+Master_SSL_Allowed No
+Master_SSL_CA_File
+Master_SSL_CA_Path
+Master_SSL_Cert
+Master_SSL_Cipher
+Master_SSL_Key
+Seconds_Behind_Master #
+show slave status;;
+Slave_IO_State #
+Master_Host 127.0.0.1
+Master_User root
+Master_Port MASTER_PORT
+Connect_Retry 1
+Master_Log_File master-bin.000001
+Read_Master_Log_Pos 98
+Relay_Log_File slave-relay-bin.000003
+Relay_Log_Pos 518
+Relay_Master_Log_File master-bin.000001
+Slave_IO_Running Yes
+Slave_SQL_Running Yes
+Replicate_Do_DB
+Replicate_Ignore_DB
+Replicate_Do_Table
+Replicate_Ignore_Table
+Replicate_Wild_Do_Table
+Replicate_Wild_Ignore_Table
+Last_Errno 0
+Last_Error
+Skip_Counter 0
+Exec_Master_Log_Pos 98
+Relay_Log_Space #
+Until_Condition None
+Until_Log_File
+Until_Log_Pos 0
+Master_SSL_Allowed No
+Master_SSL_CA_File
+Master_SSL_CA_Path
+Master_SSL_Cert
+Master_SSL_Cipher
+Master_SSL_Key
+Seconds_Behind_Master #
drop table t1;
--- 1.17/mysql-test/t/rpl_relayrotate.test 2007-01-15 19:18:56 +02:00
+++ 1.18/mysql-test/t/rpl_relayrotate.test 2007-01-15 19:18:56 +02:00
@@ -55,6 +55,34 @@ start slave;
# reading:
sync_with_master;
select max(a) from t1;
+
+
+
+# BUG#20435 Relay logs are rotated at slave_net_timeout when there's no activity
+# compare two results of slave's status separated by couple of rotate events.
+# Particularly, the relay log file's index must stay
+connection master;
+reset master;
+
+connection slave;
+let $slave_net_timeout=3;
+--eval set @@global.slave_net_timeout=$slave_net_timeout;
+stop slave; # reset and restart slave
+reset slave; # to get deterministic relay log index
+start slave;
+# wait for slave started
+sleep 2;
+--replace_result $MASTER_MYPORT MASTER_PORT
+--replace_column 1 # 23 # 33 #
+--query_vertical show slave status;
+sleep 9; # to get passed surely couple of rotate events
+--replace_result $MASTER_MYPORT MASTER_PORT
+--replace_column 1 # 23 # 33 #
+--query_vertical show slave status;
+
+
+#cleanup
+
connection master;
# The following DROP is a very important cleaning task:
| Thread |
|---|
| • bk commit into 5.0 tree (aelkin:1.2347) BUG#20435 | Andrei Elkin | 15 Jan |