List:Commits« Previous MessageNext Message »
From:Andrei Elkin Date:January 25 2007 1:03pm
Subject:bk commit into 5.0 tree (aelkin:1.2347) BUG#20435
View as plain text  
Below is the list of changes that have just been committed into a local
5.0 repository of elkin. When elkin does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet@stripped, 2007-01-25 14:03:08+02:00, aelkin@stripped
+6 -0
  Bug #20435 Relay logs are rotated at slave_net_timeout when there's no activity
  
  When receiving no events from its master longer than slave_net_timeout
  slave's replication io thread disconnects and reconnects. Reconnecting causes rotation
  of the relay log. That is unnecessary work which also causes inconviniece because
  of changes of the relay log files names: old files are removed, new are created.
  
  The behavour of slave is refined not to rotate relay log files when master does not
  rotate itself. Locally generated slave rotate events or master's events
  that bring the binlog postion the same as slave already knows are ignorable.
  Also ignorable are FD events what have artificial mark with the exception of
  very first FD event slave's io thread has to accept even if one is artificial.
  This remains valid even though master was stopped and downgraded. After
  reconnecting to a downgraded master, slave would receive first rotate and FD and
  other events of the last binlog where it was interupped to receive from, and only
  after that rotate and FD of new binlog of downgranded format version.
  If slave reconnects to all time online master and gets with rotate
  the same position it knows, then rotate event is discarded, relay log files remain
  untouched also the event is not put into the current log.
  
  The latter applies to reconnecting after slave_net_timeout which repairs
  the bug.
  
  disconnect_slave_event_counter appears to to be affected since the corresponding running
  counter was adjusted at process_io_rotate.
  no such issue is noticed for abort_slave_event_counter.

  mysql-test/r/rpl_relayrotate.result@stripped, 2007-01-25 14:03:04+02:00,
aelkin@stripped +73 -0
    changed

  mysql-test/r/rpl_until.result@stripped, 2007-01-25 14:03:04+02:00,
aelkin@stripped +7 -7
    changed

  mysql-test/t/rpl_relayrotate.test@stripped, 2007-01-25 14:03:05+02:00,
aelkin@stripped +30 -0
    blank rotate event does not changed slave status, a check added.

  mysql-test/t/rpl_until.test@stripped, 2007-01-25 14:03:05+02:00,
aelkin@stripped +2 -2
    fixing coordinates that are alterd due to implemented ignoring

  sql/slave.cc@stripped, 2007-01-25 14:03:05+02:00,
aelkin@stripped +86 -10
    do not call process_io_rotate if master is sending `fake' reconnecting
    rotate event. Effective for all 3 binlog versions.
    An immediately following Format Description (FD) event is ignored, or rather is not
put
    into relaylog, except the very first FD event from master that has to be accepted.
    This fact is flagged, the flag is on until non-fake real rotate event drops it.
    The successive FD won't be artificial and will be recorded.
    Part of process_io_rotate duty is moved into ignore_rotate_event to execute when
    the former function won't be called (ignored as well as the event).

  sql/slave.h@stripped, 2007-01-25 14:03:05+02:00, aelkin@stripped
+1 -0
    the flag to designate that master's FD is written in the current relaylog.

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	aelkin
# Host:	dsl-hkibras-fe36f900-97.dhcp.inet.fi
# Root:	/home/elkin/MySQL/TEAM/FIXES/5.0/bug20435_relay_rot_reconn_fix2

--- 1.286/sql/slave.cc	2007-01-25 14:03:16 +02:00
+++ 1.287/sql/slave.cc	2007-01-25 14:03:16 +02:00
@@ -1314,7 +1314,7 @@ static int get_master_version_and_clock(
   */
   delete mi->rli.relay_log.description_event_for_queue;
   mi->rli.relay_log.description_event_for_queue= 0;
-  
+
   if (!my_isdigit(&my_charset_bin,*mysql->server_version))
     errmsg = "Master reported unrecognized MySQL version";
   else
@@ -3496,11 +3496,13 @@ slave_begin:
 			  mi->host, mi->port,
 			  IO_RPL_LOG_NAME,
 			  llstr(mi->master_log_pos,llbuff));
-  /*
-    Adding MAX_LOG_EVENT_HEADER_LEN to the max_packet_size on the I/O
-    thread, since a replication event can become this much larger than
-    the corresponding packet (query) sent from client to master.
-  */
+    /* the flag is off until 1st master's fd stored into relaylog */
+    mi->queued_master_fd_for_exec= FALSE;
+    /*
+      Adding MAX_LOG_EVENT_HEADER_LEN to the max_packet_size on the I/O
+      thread, since a replication event can become this much larger than
+      the corresponding packet (query) sent from client to master.
+    */
     mysql->net.max_packet_size= thd->net.max_packet_size+= MAX_LOG_EVENT_HEADER;
   }
   else
@@ -4173,6 +4175,37 @@ static int process_io_rotate(MASTER_INFO
   DBUG_RETURN(0);
 }
 
+
+/* slave ignores the event if master reported the current pos */
+/* side effect calculation for future desicion on the successive FD event */
+static inline bool ignore_rotate_event(MASTER_INFO *mi, Rotate_log_event *rev)
+{
+  bool res=
+    (strcmp(mi->master_log_name, rev->new_log_ident) == 0) &&
+    (mi->master_log_pos == rev->pos);
+    /*
+      when res false, meaning to accept, the succesive event
+      is either one more rotate of non-artificial FD and it has to be accepted too.
+      The coming FD will replace  _for_queue and _for_exec
+      of mi->rli->relay_log.descriptions and be stored in new relay log file.
+    */
+  if (!res) /* flag designating, if on, that there is master's FD in the current
+	       relaylog is dropped since the current rotates */
+    mi->queued_master_fd_for_exec= !res;
+  else
+  {
+#ifndef DBUG_OFF
+    /*
+      part of ignoring (res is true) process_io_rotate that needs executing
+    */
+    if (disconnect_slave_event_count)
+      events_till_disconnect++;
+#endif
+  }
+  
+  return res;
+}
+
 /*
   Reads a 3.23 event and converts it to the slave's format. This code was
   copied from MySQL 4.0.
@@ -4234,7 +4267,13 @@ static int queue_binlog_ver_1_event(MAST
     inc_pos= event_len;
     break;
   case ROTATE_EVENT:
-    if (unlikely(process_io_rotate(mi,(Rotate_log_event*)ev)))
+    if (ignore_rotate_event(mi, (Rotate_log_event*) ev))
+    {
+      delete ev;
+      pthread_mutex_unlock(&mi->data_lock);
+      DBUG_RETURN(0);
+    }
+    if (unlikely(process_io_rotate(mi, (Rotate_log_event*) ev)))
     {
       delete ev;
       pthread_mutex_unlock(&mi->data_lock);
@@ -4318,7 +4357,13 @@ static int queue_binlog_ver_3_event(MAST
   case STOP_EVENT:
     goto err;
   case ROTATE_EVENT:
-    if (unlikely(process_io_rotate(mi,(Rotate_log_event*)ev)))
+    if (ignore_rotate_event(mi, (Rotate_log_event*) ev))
+    {
+      delete ev;
+      pthread_mutex_unlock(&mi->data_lock);
+      DBUG_RETURN(0);
+    }
+    if (unlikely(process_io_rotate(mi, (Rotate_log_event*) ev)))
     {
       delete ev;
       pthread_mutex_unlock(&mi->data_lock);
@@ -4414,8 +4459,23 @@ int queue_event(MASTER_INFO* mi,const ch
     goto err;
   case ROTATE_EVENT:
   {
-    Rotate_log_event rev(buf,event_len,mi->rli.relay_log.description_event_for_queue);

-    if (unlikely(process_io_rotate(mi,&rev)))
+    Rotate_log_event
+      rev(buf, event_len, mi->rli.relay_log.description_event_for_queue);
+    if (ignore_rotate_event(mi, &rev))
+    {
+     /*
+       Master is telling us a position which we already know (this
+       can happen when we just reconnected, asking for some position,
+       then master always sends us a "fake" Rotate event
+       containing this same position).  So, this event is useless. We
+       don't need to queue it to the relay log (saves space), to
+       rotate relay logs (releases from new files creating work).
+     */
+      pthread_mutex_unlock(&mi->data_lock);
+      DBUG_RETURN(0);
+    }
+
+    if (unlikely(process_io_rotate(mi, &rev)))
     {
       error= 1;
       goto err;
@@ -4449,6 +4509,22 @@ int queue_event(MASTER_INFO* mi,const ch
     }
     delete mi->rli.relay_log.description_event_for_queue;
     mi->rli.relay_log.description_event_for_queue= tmp;
+    /*
+      Only artificial FD events can be thrown away. They follow fake
+      rotates, which are ignored. However, still not the first such
+      artifical FD. When slave server starts the new relaylog is
+      created. In the head of the file we need to record master's FD
+      that can be artificial. That recording makes the relaylog
+      *complete*, i.e all future events in it will be interpreted
+      correctly according to master's FD.  Wheather to ignore of not
+      we look also at the flag mi->queued_master_fd_for_exec.
+     */
+    if (tmp->artificial_event && mi->queued_master_fd_for_exec)
+    {
+      pthread_mutex_unlock(&mi->data_lock);
+      DBUG_RETURN(0);
+    }
+    mi->queued_master_fd_for_exec= TRUE; // store FD to complete relaylog head
     /* 
        Though this does some conversion to the slave's format, this will
        preserve the master's binlog format version, and number of event types. 

--- 1.24/mysql-test/r/rpl_until.result	2007-01-25 14:03:16 +02:00
+++ 1.25/mysql-test/r/rpl_until.result	2007-01-25 14:03:16 +02:00
@@ -37,7 +37,7 @@ Master_Port	MASTER_MYPORT
 Connect_Retry	1
 Master_Log_File	master-bin.000001
 Read_Master_Log_Pos	776
-Relay_Log_File	slave-relay-bin.000004
+Relay_Log_File	slave-relay-bin.000003
 Relay_Log_Pos	#
 Relay_Master_Log_File	master-bin.000001
 Slave_IO_Running	#
@@ -78,7 +78,7 @@ Master_Port	MASTER_MYPORT
 Connect_Retry	1
 Master_Log_File	master-bin.000001
 Read_Master_Log_Pos	776
-Relay_Log_File	slave-relay-bin.000004
+Relay_Log_File	slave-relay-bin.000003
 Relay_Log_Pos	#
 Relay_Master_Log_File	master-bin.000001
 Slave_IO_Running	#
@@ -104,7 +104,7 @@ Master_SSL_Cert	
 Master_SSL_Cipher	
 Master_SSL_Key	
 Seconds_Behind_Master	#
-start slave until relay_log_file='slave-relay-bin.000004', relay_log_pos=746;
+start slave until relay_log_file='slave-relay-bin.000003', relay_log_pos=840;
 select * from t2;
 n
 1
@@ -117,7 +117,7 @@ Master_Port	MASTER_MYPORT
 Connect_Retry	1
 Master_Log_File	master-bin.000001
 Read_Master_Log_Pos	776
-Relay_Log_File	slave-relay-bin.000004
+Relay_Log_File	slave-relay-bin.000003
 Relay_Log_Pos	#
 Relay_Master_Log_File	master-bin.000001
 Slave_IO_Running	#
@@ -134,8 +134,8 @@ Skip_Counter	0
 Exec_Master_Log_Pos	608
 Relay_Log_Space	#
 Until_Condition	Relay
-Until_Log_File	slave-relay-bin.000004
-Until_Log_Pos	746
+Until_Log_File	slave-relay-bin.000003
+Until_Log_Pos	840
 Master_SSL_Allowed	No
 Master_SSL_CA_File	
 Master_SSL_CA_Path	
@@ -154,7 +154,7 @@ Master_Port	MASTER_MYPORT
 Connect_Retry	1
 Master_Log_File	master-bin.000001
 Read_Master_Log_Pos	776
-Relay_Log_File	slave-relay-bin.000004
+Relay_Log_File	slave-relay-bin.000003
 Relay_Log_Pos	#
 Relay_Master_Log_File	master-bin.000001
 Slave_IO_Running	Yes

--- 1.23/mysql-test/t/rpl_until.test	2007-01-25 14:03:16 +02:00
+++ 1.24/mysql-test/t/rpl_until.test	2007-01-25 14:03:16 +02:00
@@ -44,7 +44,7 @@ wait_for_slave_to_stop;
 --query_vertical SHOW SLAVE STATUS
 
 # try replicate all until second insert to t2;
-start slave until relay_log_file='slave-relay-bin.000004', relay_log_pos=746;
+start slave until relay_log_file='slave-relay-bin.000003', relay_log_pos=840;
 sleep 2;
 wait_for_slave_to_stop;
 select * from t2;
@@ -65,7 +65,7 @@ start slave until master_log_file='maste
 sleep 2;
 wait_for_slave_to_stop;
 # here the sql slave thread should be stopped
---replace_result $MASTER_MYPORT MASTER_MYPORT bin.000005 bin.000004 bin.000006 bin.000004
bin.000007 bin.000004
+--replace_result $MASTER_MYPORT MASTER_MYPORT bin.000005 bin.000003 bin.000006 bin.000003
bin.000007 bin.000003
 --replace_column 1 # 9 # 23 # 33 #
 --query_vertical SHOW SLAVE STATUS
 

--- 1.20/mysql-test/r/rpl_relayrotate.result	2007-01-25 14:03:16 +02:00
+++ 1.21/mysql-test/r/rpl_relayrotate.result	2007-01-25 14:03:16 +02:00
@@ -13,4 +13,77 @@ start slave;
 select max(a) from t1;
 max(a)
 8000
+set @@global.slave_net_timeout=3;
+stop slave;
+reset slave;
+reset master;
+start slave;
+show slave status;;
+Slave_IO_State	#
+Master_Host	127.0.0.1
+Master_User	root
+Master_Port	#
+Connect_Retry	1
+Master_Log_File	master-bin.000001
+Read_Master_Log_Pos	98
+Relay_Log_File	slave-relay-bin.000003
+Relay_Log_Pos	236
+Relay_Master_Log_File	master-bin.000001
+Slave_IO_Running	Yes
+Slave_SQL_Running	Yes
+Replicate_Do_DB	
+Replicate_Ignore_DB	
+Replicate_Do_Table	
+Replicate_Ignore_Table	
+Replicate_Wild_Do_Table	
+Replicate_Wild_Ignore_Table	
+Last_Errno	0
+Last_Error	
+Skip_Counter	0
+Exec_Master_Log_Pos	98
+Relay_Log_Space	#
+Until_Condition	None
+Until_Log_File	
+Until_Log_Pos	0
+Master_SSL_Allowed	No
+Master_SSL_CA_File	
+Master_SSL_CA_Path	
+Master_SSL_Cert	
+Master_SSL_Cipher	
+Master_SSL_Key	
+Seconds_Behind_Master	#
+show slave status;;
+Slave_IO_State	#
+Master_Host	127.0.0.1
+Master_User	root
+Master_Port	#
+Connect_Retry	1
+Master_Log_File	master-bin.000001
+Read_Master_Log_Pos	98
+Relay_Log_File	slave-relay-bin.000003
+Relay_Log_Pos	236
+Relay_Master_Log_File	master-bin.000001
+Slave_IO_Running	Yes
+Slave_SQL_Running	Yes
+Replicate_Do_DB	
+Replicate_Ignore_DB	
+Replicate_Do_Table	
+Replicate_Ignore_Table	
+Replicate_Wild_Do_Table	
+Replicate_Wild_Ignore_Table	
+Last_Errno	0
+Last_Error	
+Skip_Counter	0
+Exec_Master_Log_Pos	98
+Relay_Log_Space	#
+Until_Condition	None
+Until_Log_File	
+Until_Log_Pos	0
+Master_SSL_Allowed	No
+Master_SSL_CA_File	
+Master_SSL_CA_Path	
+Master_SSL_Cert	
+Master_SSL_Cipher	
+Master_SSL_Key	
+Seconds_Behind_Master	#
 drop table t1;

--- 1.17/mysql-test/t/rpl_relayrotate.test	2007-01-25 14:03:16 +02:00
+++ 1.18/mysql-test/t/rpl_relayrotate.test	2007-01-25 14:03:16 +02:00
@@ -55,6 +55,36 @@ start slave;
 # reading:
 sync_with_master;
 select max(a) from t1;
+
+
+
+# BUG#20435 Relay logs are rotated at slave_net_timeout when there's no activity
+# compare two results of slave's status separated by couple of rotate events.
+# Particularly, the relay log file's index must stay
+
+connection slave;
+set @@global.slave_net_timeout=3;
+stop slave;  # reset and restart slave
+reset slave; # to get deterministic relay log index
+
+connection master;
+reset master;
+
+connection slave;
+start slave; 
+#  wait for slave started
+sleep 2;
+--replace_result $MASTER_MYPORT MASTER_PORT
+--replace_column 1 # 4 # 23 # 33 #
+--query_vertical show slave status;
+sleep 9; # to get passed surely couple of rotate events
+--replace_result $MASTER_MYPORT MASTER_PORT
+--replace_column 1 # 4 # 23 # 33 #
+--query_vertical show slave status;
+
+
+#cleanup
+
 connection master;
 
 # The following DROP is a very important cleaning task:

--- 1.98/sql/slave.h	2007-01-25 14:03:16 +02:00
+++ 1.99/sql/slave.h	2007-01-25 14:03:16 +02:00
@@ -441,6 +441,7 @@ typedef struct st_master_info
 
   */
   long clock_diff_with_master;
+  bool queued_master_fd_for_exec;  // the marker to help ignoring artificial FDs
 
   st_master_info()
     :ssl(0), fd(-1),  io_thd(0), inited(0),
Thread
bk commit into 5.0 tree (aelkin:1.2347) BUG#20435Andrei Elkin25 Jan