List:Commits« Previous MessageNext Message »
From:Andrei Elkin Date:January 24 2007 9:55pm
Subject:bk commit into 5.0 tree (aelkin:1.2347) BUG#20435
View as plain text  
Below is the list of changes that have just been committed into a local
5.0 repository of elkin. When elkin does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet@stripped, 2007-01-24 22:55:09+02:00, aelkin@stripped
+4 -0
  Bug #20435 Relay logs are rotated at slave_net_timeout when there's no activity
  
  When receiving no events from its master longer than slave_net_timeout
  slave's replication io thread disconnects and reconnects. Reconnecting causes rotation
  of the relay log. That is unnecessary work which also causes inconviniece because
  of changes of the relay log files names: old files are removed, new are created.
  
  The behavour of slave is refined not to rotate relay log files when master does not
  rotate itself. Locally generated slave rotate events or master's events
  that bring the binlog postion the same as slave already knows are ignorable.
  Also ignorable are FD events what have artificial mark with the exception of
  very first FD event slave's io thread has to accept even if one is artificial.
  This remains valid even though master was stopped and downgraded. After
  reconnecting to a downgraded master, slave would receive first rotate and FD and
  other events of the last binlog where it was interupped to receive from, and only
  after that rotate and FD of new binlog of downgranded format version.
  If slave reconnects to all time online master and gets with rotate
  the same position it knows, then rotate event is discarded, relay log files remain
  untouched also the event is not put into the current log.
  
  The latter applies to reconnecting after slave_net_timeout which repairs
  the bug.

  mysql-test/r/rpl_relayrotate.result@stripped, 2007-01-24 22:55:06+02:00,
aelkin@stripped +73 -0
    changed

  mysql-test/t/rpl_relayrotate.test@stripped, 2007-01-24 22:55:06+02:00,
aelkin@stripped +30 -0
    blank rotate event does not changed slave status, a check added.

  sql/slave.cc@stripped, 2007-01-24 22:55:06+02:00,
aelkin@stripped +76 -10
    do not call process_io_rotate if master is sending `fake' reconnecting
    rotate event. Effective for all 3 binlog versions.
    An immediately following Format Description (FD) event is ignored, or rather is not
put
    into relaylog, except the very first FD event from master that has to be accepted.
    This fact is flagged, the flag is on until non-fake real rotate event drops it.
    The successive FD won't be artificial and will be recorded.

  sql/slave.h@stripped, 2007-01-24 22:55:06+02:00, aelkin@stripped
+1 -0
    the flag to designate that master's FD is written in the current relaylog.

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	aelkin
# Host:	dsl-hkibras-fe36f900-97.dhcp.inet.fi
# Root:	/home/elkin/MySQL/TEAM/FIXES/5.0/bug20435_relay_rot_reconn_fix2

--- 1.286/sql/slave.cc	2007-01-24 22:55:16 +02:00
+++ 1.287/sql/slave.cc	2007-01-24 22:55:16 +02:00
@@ -1314,7 +1314,7 @@ static int get_master_version_and_clock(
   */
   delete mi->rli.relay_log.description_event_for_queue;
   mi->rli.relay_log.description_event_for_queue= 0;
-  
+
   if (!my_isdigit(&my_charset_bin,*mysql->server_version))
     errmsg = "Master reported unrecognized MySQL version";
   else
@@ -3496,11 +3496,13 @@ slave_begin:
 			  mi->host, mi->port,
 			  IO_RPL_LOG_NAME,
 			  llstr(mi->master_log_pos,llbuff));
-  /*
-    Adding MAX_LOG_EVENT_HEADER_LEN to the max_packet_size on the I/O
-    thread, since a replication event can become this much larger than
-    the corresponding packet (query) sent from client to master.
-  */
+    /* the flag is off until 1st master's fd stored into relaylog */
+    mi->queued_master_fd_for_exec= FALSE;
+    /*
+      Adding MAX_LOG_EVENT_HEADER_LEN to the max_packet_size on the I/O
+      thread, since a replication event can become this much larger than
+      the corresponding packet (query) sent from client to master.
+    */
     mysql->net.max_packet_size= thd->net.max_packet_size+= MAX_LOG_EVENT_HEADER;
   }
   else
@@ -4173,6 +4175,27 @@ static int process_io_rotate(MASTER_INFO
   DBUG_RETURN(0);
 }
 
+
+/* slave ignores the event if master reported the current pos */
+/* side effect calculation for future desicion on the successive FD event */
+static inline bool ignore_rotate_event(MASTER_INFO *mi, Rotate_log_event *rev)
+{
+  bool res=
+    (strcmp(mi->master_log_name, rev->new_log_ident) == 0) &&
+    (mi->master_log_pos == rev->pos);
+    /*
+      when res false, meaning to accept, the succesive event
+      is either one more rotate of non-artificial FD and it has to be accepted too.
+      The coming FD will replace  _for_queue and _for_exec
+      of mi->rli->relay_log.descriptions and be stored in new relay log file.
+    */
+  if (!res) /* flag designating, if on, that there is master's FD in the current
+	       relaylog is dropped since the current rotates */
+    mi->queued_master_fd_for_exec= !res;
+  
+  return res;
+}
+
 /*
   Reads a 3.23 event and converts it to the slave's format. This code was
   copied from MySQL 4.0.
@@ -4234,7 +4257,13 @@ static int queue_binlog_ver_1_event(MAST
     inc_pos= event_len;
     break;
   case ROTATE_EVENT:
-    if (unlikely(process_io_rotate(mi,(Rotate_log_event*)ev)))
+    if (ignore_rotate_event(mi, (Rotate_log_event*) ev))
+    {
+      delete ev;
+      pthread_mutex_unlock(&mi->data_lock);
+      DBUG_RETURN(0);
+    }
+    if (unlikely(process_io_rotate(mi, (Rotate_log_event*) ev)))
     {
       delete ev;
       pthread_mutex_unlock(&mi->data_lock);
@@ -4318,7 +4347,13 @@ static int queue_binlog_ver_3_event(MAST
   case STOP_EVENT:
     goto err;
   case ROTATE_EVENT:
-    if (unlikely(process_io_rotate(mi,(Rotate_log_event*)ev)))
+    if (ignore_rotate_event(mi, (Rotate_log_event*) ev))
+    {
+      delete ev;
+      pthread_mutex_unlock(&mi->data_lock);
+      DBUG_RETURN(0);
+    }
+    if (unlikely(process_io_rotate(mi, (Rotate_log_event*) ev)))
     {
       delete ev;
       pthread_mutex_unlock(&mi->data_lock);
@@ -4414,8 +4449,23 @@ int queue_event(MASTER_INFO* mi,const ch
     goto err;
   case ROTATE_EVENT:
   {
-    Rotate_log_event rev(buf,event_len,mi->rli.relay_log.description_event_for_queue);

-    if (unlikely(process_io_rotate(mi,&rev)))
+    Rotate_log_event
+      rev(buf, event_len, mi->rli.relay_log.description_event_for_queue);
+    if (ignore_rotate_event(mi, &rev))
+    {
+     /*
+       Master is telling us a position which we already know (this
+       can happen when we just reconnected, asking for some position,
+       then master always sends us a "fake" Rotate event
+       containing this same position).  So, this event is useless. We
+       don't need to queue it to the relay log (saves space), to
+       rotate relay logs (releases from new files creating work).
+     */
+      pthread_mutex_unlock(&mi->data_lock);
+      DBUG_RETURN(0);
+    }
+
+    if (unlikely(process_io_rotate(mi, &rev)))
     {
       error= 1;
       goto err;
@@ -4449,6 +4499,22 @@ int queue_event(MASTER_INFO* mi,const ch
     }
     delete mi->rli.relay_log.description_event_for_queue;
     mi->rli.relay_log.description_event_for_queue= tmp;
+    /*
+      Only artificial FD events can be thrown away. They follow fake
+      rotates, which are ignored. However, still not the first such
+      artifical FD. When slave server starts the new relaylog is
+      created. In the head of the file we need to record master's FD
+      that can be artificial. That recording makes the relaylog
+      *complete*, i.e all future events in it will be interpreted
+      correctly according to master's FD.  Wheather to ignore of not
+      we look also at the flag mi->queued_master_fd_for_exec.
+     */
+    if (tmp->artificial_event && mi->queued_master_fd_for_exec)
+    {
+      pthread_mutex_unlock(&mi->data_lock);
+      DBUG_RETURN(0);
+    }
+    mi->queued_master_fd_for_exec= TRUE; // store FD to complete relaylog head
     /* 
        Though this does some conversion to the slave's format, this will
        preserve the master's binlog format version, and number of event types. 

--- 1.20/mysql-test/r/rpl_relayrotate.result	2007-01-24 22:55:16 +02:00
+++ 1.21/mysql-test/r/rpl_relayrotate.result	2007-01-24 22:55:16 +02:00
@@ -13,4 +13,77 @@ start slave;
 select max(a) from t1;
 max(a)
 8000
+set @@global.slave_net_timeout=3;
+stop slave;
+reset slave;
+reset master;
+start slave;
+show slave status;;
+Slave_IO_State	#
+Master_Host	127.0.0.1
+Master_User	root
+Master_Port	#
+Connect_Retry	1
+Master_Log_File	master-bin.000001
+Read_Master_Log_Pos	98
+Relay_Log_File	slave-relay-bin.000003
+Relay_Log_Pos	236
+Relay_Master_Log_File	master-bin.000001
+Slave_IO_Running	Yes
+Slave_SQL_Running	Yes
+Replicate_Do_DB	
+Replicate_Ignore_DB	
+Replicate_Do_Table	
+Replicate_Ignore_Table	
+Replicate_Wild_Do_Table	
+Replicate_Wild_Ignore_Table	
+Last_Errno	0
+Last_Error	
+Skip_Counter	0
+Exec_Master_Log_Pos	98
+Relay_Log_Space	#
+Until_Condition	None
+Until_Log_File	
+Until_Log_Pos	0
+Master_SSL_Allowed	No
+Master_SSL_CA_File	
+Master_SSL_CA_Path	
+Master_SSL_Cert	
+Master_SSL_Cipher	
+Master_SSL_Key	
+Seconds_Behind_Master	#
+show slave status;;
+Slave_IO_State	#
+Master_Host	127.0.0.1
+Master_User	root
+Master_Port	#
+Connect_Retry	1
+Master_Log_File	master-bin.000001
+Read_Master_Log_Pos	98
+Relay_Log_File	slave-relay-bin.000003
+Relay_Log_Pos	236
+Relay_Master_Log_File	master-bin.000001
+Slave_IO_Running	Yes
+Slave_SQL_Running	Yes
+Replicate_Do_DB	
+Replicate_Ignore_DB	
+Replicate_Do_Table	
+Replicate_Ignore_Table	
+Replicate_Wild_Do_Table	
+Replicate_Wild_Ignore_Table	
+Last_Errno	0
+Last_Error	
+Skip_Counter	0
+Exec_Master_Log_Pos	98
+Relay_Log_Space	#
+Until_Condition	None
+Until_Log_File	
+Until_Log_Pos	0
+Master_SSL_Allowed	No
+Master_SSL_CA_File	
+Master_SSL_CA_Path	
+Master_SSL_Cert	
+Master_SSL_Cipher	
+Master_SSL_Key	
+Seconds_Behind_Master	#
 drop table t1;

--- 1.17/mysql-test/t/rpl_relayrotate.test	2007-01-24 22:55:16 +02:00
+++ 1.18/mysql-test/t/rpl_relayrotate.test	2007-01-24 22:55:16 +02:00
@@ -55,6 +55,36 @@ start slave;
 # reading:
 sync_with_master;
 select max(a) from t1;
+
+
+
+# BUG#20435 Relay logs are rotated at slave_net_timeout when there's no activity
+# compare two results of slave's status separated by couple of rotate events.
+# Particularly, the relay log file's index must stay
+
+connection slave;
+set @@global.slave_net_timeout=3;
+stop slave;  # reset and restart slave
+reset slave; # to get deterministic relay log index
+
+connection master;
+reset master;
+
+connection slave;
+start slave; 
+#  wait for slave started
+sleep 2;
+--replace_result $MASTER_MYPORT MASTER_PORT
+--replace_column 1 # 4 # 23 # 33 #
+--query_vertical show slave status;
+sleep 9; # to get passed surely couple of rotate events
+--replace_result $MASTER_MYPORT MASTER_PORT
+--replace_column 1 # 4 # 23 # 33 #
+--query_vertical show slave status;
+
+
+#cleanup
+
 connection master;
 
 # The following DROP is a very important cleaning task:

--- 1.98/sql/slave.h	2007-01-24 22:55:16 +02:00
+++ 1.99/sql/slave.h	2007-01-24 22:55:16 +02:00
@@ -441,6 +441,7 @@ typedef struct st_master_info
 
   */
   long clock_diff_with_master;
+  bool queued_master_fd_for_exec;  // the marker to help ignoring artificial FDs
 
   st_master_info()
     :ssl(0), fd(-1),  io_thd(0), inited(0),
Thread
bk commit into 5.0 tree (aelkin:1.2347) BUG#20435Andrei Elkin24 Jan