From: Date: October 12 2005 1:29pm Subject: bk commit into 4.1 tree (guilhem:1.2471) BUG#13023 List-Archive: http://lists.mysql.com/internals/30955 X-Bug: 13023 Message-Id: <200510121129.j9CBTw8w005495@gbichot3.local> Below is the list of changes that have just been committed into a local 4.1 repository of guilhem. When guilhem does a push these changes will be propagated to the main repository and, within 24 hours after the push, to the public repository. For information on how to access the public repository see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html ChangeSet 1.2471 05/10/12 13:29:55 guilhem@stripped +8 -0 Fix for BUG#13023: "SQL Thread is up but doesn't move forward". Details in slave.cc; in short we now record whenever the slave I/O thread ignores a master's event because of its server id, and use this info in the slave SQL thread to advance Exec_master_log_pos. Because if we do not, this variable stays at the position of the last executed event, i.e. the last *non-ignored* executed one, which may not be the last of the master's binlog (and so the slave *looks* behind the master though it's data-wise it's not). mysql-test/t/rpl_dual_pos_advance.test 1.1 05/10/12 13:29:51 guilhem@stripped +108 -0 Test for BUG#13023 (with a part, disabled, to test BUG#13861 when I fix it). Before the fix, this test used to hang. mysql-test/r/rpl_dual_pos_advance.result 1.1 05/10/12 13:29:51 guilhem@stripped +22 -0 New BitKeeper file ``mysql-test/r/rpl_dual_pos_advance.result'' sql/slave.h 1.88 05/10/12 13:29:51 guilhem@stripped +11 -0 A couple of coordinates in RELAY_LOG_INFO to keep track of the last ignored events received by the slave I/O thread (ignored because of the server id). sql/slave.cc 1.277 05/10/12 13:29:51 guilhem@stripped +109 -19 A comment for BUG#13861 (to be fixed). llstr() instead of %ld as the number is ulonglong. mi->rli becomes rli in some places. Fix for BUG#13023: - in the slave I/O thread, whenever we ignore an event because of its server id we update a couple of coordinates in memory - in the slave SQL thread, whenever we bump into the end of the latest relay log, we check this couple of coordinates to see if we should advance our Exec_master_log_pos. - when the slave I/O thread terminates it saves these in-memory coordinates into a Rotate event in the relay log, so that they are durable. sql/log_event.h 1.99 05/10/12 13:29:51 guilhem@stripped +18 -10 We now have a case where a Rotate_log_event is executed by the slave SQL thread while not being in the relay log, so it needs to pretend its length is 0: a ZERO_LEN flag for that; a flag DUP_NAME (replaces "bool alloced") to be able to choose if we want the constructor to duplicate the string argument or not. sql/log_event.cc 1.186 05/10/12 13:29:51 guilhem@stripped +33 -9 Moving one Rotate_log_event constructor from log_event.h. Support for on-demand choice of duplicating the string argument of the constructor or not (because there now are needs for both alternatives, see slave.cc). mysql-test/t/rpl_dual_pos_advance.test 1.0 05/10/12 13:29:51 guilhem@stripped +0 -0 BitKeeper file /home/mysql_src/mysql-4.1/mysql-test/t/rpl_dual_pos_advance.test mysql-test/r/rpl_dual_pos_advance.result 1.0 05/10/12 13:29:51 guilhem@stripped +0 -0 BitKeeper file /home/mysql_src/mysql-4.1/mysql-test/r/rpl_dual_pos_advance.result sql/log.cc 1.164 05/10/12 13:29:50 guilhem@stripped +3 -3 No more default arguments for Rotate_log_event constructor. MYSQL_LOG::appendv() is now called without mutex. mysql-test/t/rpl_dual_pos_advance-master.opt 1.1 05/10/10 23:09:11 guilhem@stripped +0 -0 empty; its goal is just to trigger a server restart after running the test, so that the master forgets that it was a slave (otherwise it affects the following tests). mysql-test/t/rpl_dual_pos_advance-master.opt 1.0 05/10/10 23:09:11 guilhem@stripped +0 -0 BitKeeper file /home/mysql_src/mysql-4.1/mysql-test/t/rpl_dual_pos_advance-master.opt # This is a BitKeeper patch. What follows are the unified diffs for the # set of deltas contained in the patch. The rest of the patch, the part # that BitKeeper cares about, is below these diffs. # User: guilhem # Host: gbichot3.local # Root: /home/mysql_src/mysql-4.1 --- 1.163/sql/log.cc 2005-07-12 05:04:54 +02:00 +++ 1.164/sql/log.cc 2005-10-12 13:29:50 +02:00 @@ -1028,7 +1028,8 @@ to change base names at some point. */ THD *thd = current_thd; /* may be 0 if we are reacting to SIGHUP */ - Rotate_log_event r(thd,new_name+dirname_length(new_name)); + Rotate_log_event r(thd,new_name+dirname_length(new_name), + 0, LOG_EVENT_OFFSET, 0); r.set_log_pos(this); r.write(&log_file); bytes_written += r.get_event_len(); @@ -1106,7 +1107,7 @@ DBUG_ASSERT(log_file.type == SEQ_READ_APPEND); - pthread_mutex_lock(&LOCK_log); + safe_mutex_assert_owner(&LOCK_log); do { if (my_b_append(&log_file,(byte*) buf,len)) @@ -1125,7 +1126,6 @@ } err: - pthread_mutex_unlock(&LOCK_log); if (!error) signal_update(); DBUG_RETURN(error); --- 1.185/sql/log_event.cc 2005-09-01 17:55:46 +02:00 +++ 1.186/sql/log_event.cc 2005-10-12 13:29:51 +02:00 @@ -2013,18 +2013,44 @@ #endif /* MYSQL_CLIENT */ + /* - Rotate_log_event::Rotate_log_event() + Rotate_log_event::Rotate_log_event() (2 constructors) */ + +#ifndef MYSQL_CLIENT +Rotate_log_event::Rotate_log_event(THD* thd_arg, + const char* new_log_ident_arg, + uint ident_len_arg, ulonglong pos_arg, + uint flags_arg) + :Log_event(), new_log_ident(new_log_ident_arg), + pos(pos_arg),ident_len(ident_len_arg ? ident_len_arg : + (uint) strlen(new_log_ident_arg)), flags(flags_arg) +{ +#ifndef DBUG_OFF + char buff[22]; + DBUG_ENTER("Rotate_log_event::Rotate_log_event(THD*,...)"); + DBUG_PRINT("enter",("new_log_ident %s pos %s flags %lu", new_log_ident_arg, + llstr(pos_arg, buff), flags)); +#endif + if (flags & DUP_NAME) + new_log_ident= my_strdup_with_length(new_log_ident_arg, + ident_len, + MYF(MY_WME)); + DBUG_VOID_RETURN; +} +#endif + + Rotate_log_event::Rotate_log_event(const char* buf, int event_len, bool old_format) - :Log_event(buf, old_format),new_log_ident(NULL),alloced(0) + :Log_event(buf, old_format), new_log_ident(0), flags(DUP_NAME) { // The caller will ensure that event_len is what we have at EVENT_LEN_OFFSET int header_size = (old_format) ? OLD_HEADER_LEN : LOG_EVENT_HEADER_LEN; uint ident_offset; - DBUG_ENTER("Rotate_log_event"); + DBUG_ENTER("Rotate_log_event::Rotate_log_event(char*,...)"); if (event_len < header_size) DBUG_VOID_RETURN; @@ -2043,12 +2069,9 @@ ident_offset = ROTATE_HEADER_LEN; } set_if_smaller(ident_len,FN_REFLEN-1); - if (!(new_log_ident= my_strdup_with_length((byte*) buf + - ident_offset, - (uint) ident_len, - MYF(MY_WME)))) - DBUG_VOID_RETURN; - alloced = 1; + new_log_ident= my_strdup_with_length((byte*) buf + ident_offset, + (uint) ident_len, + MYF(MY_WME)); DBUG_VOID_RETURN; } @@ -2060,6 +2083,7 @@ int Rotate_log_event::write_data(IO_CACHE* file) { char buf[ROTATE_HEADER_LEN]; + DBUG_ASSERT(!(flags & ZERO_LEN)); // such an event cannot be written int8store(buf + R_POS_OFFSET, pos); return (my_b_safe_write(file, (byte*)buf, ROTATE_HEADER_LEN) || my_b_safe_write(file, (byte*)new_log_ident, (uint) ident_len)); --- 1.98/sql/log_event.h 2005-08-19 20:49:29 +02:00 +++ 1.99/sql/log_event.h 2005-10-12 13:29:51 +02:00 @@ -434,7 +434,7 @@ } virtual int get_data_size() { return 0;} virtual int get_data_body_offset() { return 0; } - int get_event_len() + virtual int get_event_len() { return (cached_event_len ? cached_event_len : (cached_event_len = LOG_EVENT_HEADER_LEN + get_data_size())); @@ -849,18 +849,18 @@ class Rotate_log_event: public Log_event { public: + enum { + ZERO_LEN= 1, // if event should report 0 as its length + DUP_NAME= 2 // if constructor should dup the string argument + }; const char* new_log_ident; ulonglong pos; uint ident_len; - bool alloced; + uint flags; #ifndef MYSQL_CLIENT Rotate_log_event(THD* thd_arg, const char* new_log_ident_arg, - uint ident_len_arg = 0, - ulonglong pos_arg = LOG_EVENT_OFFSET) - :Log_event(), new_log_ident(new_log_ident_arg), - pos(pos_arg),ident_len(ident_len_arg ? ident_len_arg : - (uint) strlen(new_log_ident_arg)), alloced(0) - {} + uint ident_len_arg, + ulonglong pos_arg, uint flags); #ifdef HAVE_REPLICATION void pack_info(Protocol* protocol); int exec_event(struct st_relay_log_info* rli); @@ -872,10 +872,18 @@ Rotate_log_event(const char* buf, int event_len, bool old_format); ~Rotate_log_event() { - if (alloced) - my_free((gptr) new_log_ident, MYF(0)); + if (flags & DUP_NAME) + my_free((gptr) new_log_ident, MYF(MY_ALLOW_ZERO_PTR)); } Log_event_type get_type_code() { return ROTATE_EVENT;} + virtual int get_event_len() + { + if (flags & ZERO_LEN) + return 0; + if (cached_event_len == 0) + cached_event_len= LOG_EVENT_HEADER_LEN + get_data_size(); + return cached_event_len; + } int get_data_size() { return ident_len + ROTATE_HEADER_LEN;} bool is_valid() { return new_log_ident != 0; } int write_data(IO_CACHE* file); --- 1.276/sql/slave.cc 2005-09-14 14:31:33 +02:00 +++ 1.277/sql/slave.cc 2005-10-12 13:29:51 +02:00 @@ -1739,6 +1739,55 @@ } +/* + Builds a Rotate from the ignored events' info and writes it to relay log. + + SYNOPSIS + write_ignored_events_info_to_relay_log() + thd pointer to I/O thread's thd + mi + + DESCRIPTION + Slave I/O thread, going to die, must leave a durable trace of the + ignored events' end position for the use of the slave SQL thread, by + calling this function. Only that thread can call it (see assertion). + */ +static void write_ignored_events_info_to_relay_log(THD *thd, MASTER_INFO *mi) +{ + RELAY_LOG_INFO *rli= &mi->rli; + pthread_mutex_t *log_lock= rli->relay_log.get_log_lock(); + DBUG_ASSERT(thd == mi->io_thd); + pthread_mutex_lock(log_lock); + if (rli->ign_master_log_name_end[0]) + { + DBUG_PRINT("info",("writing a Rotate event to track down ignored events")); + Rotate_log_event *ev= new Rotate_log_event(thd, rli->ign_master_log_name_end, + 0, rli->ign_master_log_pos_end, + Rotate_log_event::DUP_NAME); + rli->ign_master_log_name_end[0]= 0; + /* can unlock before writing as slave SQL thd will soon see our Rotate */ + pthread_mutex_unlock(log_lock); + if (likely((bool)ev)) + { + ev->server_id= 0; // don't be ignored by slave SQL thread + if (unlikely(rli->relay_log.append(ev))) + sql_print_error("Slave I/O thread failed to write a Rotate event" + " to the relay log, " + "SHOW SLAVE STATUS may be inaccurate"); + rli->relay_log.harvest_bytes_written(&rli->log_space_total); + flush_master_info(mi, 1); + delete ev; + } + else + sql_print_error("Slave I/O thread failed to create a Rotate event" + " (out of memory?), " + "SHOW SLAVE STATUS may be inaccurate"); + } + else + pthread_mutex_unlock(log_lock); +} + + void init_master_info_with_options(MASTER_INFO* mi) { mi->master_log_name[0] = 0; @@ -2334,7 +2383,7 @@ { group_relay_log_name[0]= event_relay_log_name[0]= group_master_log_name[0]= 0; - last_slave_error[0]=0; until_log_name[0]= 0; + last_slave_error[0]= until_log_name[0]= ign_master_log_name_end[0]= 0; bzero((char*) &info_file, sizeof(info_file)); bzero((char*) &cache_buf, sizeof(cache_buf)); @@ -2886,11 +2935,20 @@ */ pthread_mutex_lock(&rli->data_lock); + /* + This tests if the position of the end of the last previous executed event + hits the UNTIL barrier. + We would prefer to test if the position of the start (or possibly) end of + the to-be-read event hits the UNTIL barrier, this is different if there + was an event ignored by the I/O thread just before (BUG#13861 to be + fixed). + */ if (rli->until_condition!=RELAY_LOG_INFO::UNTIL_NONE && rli->is_until_satisfied()) { + char buf[22]; sql_print_error("Slave SQL thread stopped because it reached its" - " UNTIL position %ld", (long) rli->until_pos()); + " UNTIL position %s", llstr(rli->until_pos(), buf)); /* Setting abort_slave flag because we do not want additional message about error in query execution to be printed. @@ -2925,7 +2983,6 @@ if ((ev->server_id == (uint32) ::server_id && !replicate_same_server_id) || (rli->slave_skip_counter && type_code != ROTATE_EVENT)) { - /* TODO: I/O thread should not even log events with the same server id */ rli->inc_group_relay_log_pos(ev->get_event_len(), type_code != STOP_EVENT ? ev->log_pos : LL(0), 1/* skip lock*/); @@ -3033,7 +3090,8 @@ { THD *thd; // needs to be first for thread_stack MYSQL *mysql; - MASTER_INFO *mi = (MASTER_INFO*)arg; + MASTER_INFO *mi = (MASTER_INFO*)arg; + RELAY_LOG_INFO *rli= &mi->rli; char llbuff[22]; uint retry_count; @@ -3276,16 +3334,16 @@ char llbuf1[22], llbuf2[22]; DBUG_PRINT("info", ("log_space_limit=%s log_space_total=%s \ ignore_log_space_limit=%d", - llstr(mi->rli.log_space_limit,llbuf1), - llstr(mi->rli.log_space_total,llbuf2), - (int) mi->rli.ignore_log_space_limit)); + llstr(rli->log_space_limit,llbuf1), + llstr(rli->log_space_total,llbuf2), + (int) rli->ignore_log_space_limit)); } #endif - if (mi->rli.log_space_limit && mi->rli.log_space_limit < - mi->rli.log_space_total && - !mi->rli.ignore_log_space_limit) - if (wait_for_relay_log_space(&mi->rli)) + if (rli->log_space_limit && rli->log_space_limit < + rli->log_space_total && + !rli->ignore_log_space_limit) + if (wait_for_relay_log_space(rli)) { sql_print_error("Slave I/O thread aborted while waiting for relay \ log space"); @@ -3316,6 +3374,7 @@ mysql_close(mysql); mi->mysql=0; } + write_ignored_events_info_to_relay_log(thd, mi); thd->proc_info = "Waiting for slave mutex on exit"; pthread_mutex_lock(&mi->run_lock); mi->slave_running = 0; @@ -3668,6 +3727,7 @@ if (unlikely(!rev->is_valid())) DBUG_RETURN(1); + /* Safe copy as 'rev' has been "sanitized" in Rotate_log_event's ctor */ memcpy(mi->master_log_name, rev->new_log_ident, rev->ident_len+1); mi->master_log_pos= rev->pos; DBUG_PRINT("info", ("master_log_pos: '%s' %d", @@ -3813,6 +3873,7 @@ int error= 0; ulong inc_pos; RELAY_LOG_INFO *rli= &mi->rli; + pthread_mutex_t *log_lock= rli->relay_log.get_log_lock(); DBUG_ENTER("queue_event"); if (mi->old_format) @@ -3820,11 +3881,6 @@ pthread_mutex_lock(&mi->data_lock); - /* - TODO: figure out if other events in addition to Rotate - require special processing. - Guilhem 2003-06 : I don't think so. - */ switch (buf[EVENT_TYPE_OFFSET]) { case STOP_EVENT: /* @@ -3873,16 +3929,27 @@ direct master (an unsupported, useless setup!). */ + pthread_mutex_lock(log_lock); + if ((uint4korr(buf + SERVER_ID_OFFSET) == ::server_id) && !replicate_same_server_id) { /* Do not write it to the relay log. - We still want to increment, so that we won't re-read this event from the - master if the slave IO thread is now stopped/restarted (more efficient if - the events we are ignoring are big LOAD DATA INFILE). + a) We still want to increment mi->master_log_pos, so that we won't + re-read this event from the master if the slave IO thread is now + stopped/restarted (more efficient if the events we are ignoring are big + LOAD DATA INFILE). + b) We want to record that we are skipping events, for the information of + the slave SQL thread, otherwise that thread may let + rli->group_relay_log_pos stay too small if the last binlog's event is + ignored. */ mi->master_log_pos+= inc_pos; + memcpy(rli->ign_master_log_name_end, mi->master_log_name, FN_REFLEN); + DBUG_ASSERT(rli->ign_master_log_name_end[0]); + rli->ign_master_log_pos_end= mi->master_log_pos; + rli->relay_log.signal_update(); // the slave SQL thread needs to re-check DBUG_PRINT("info", ("master_log_pos: %d, event originating from the same server, ignored", (ulong) mi->master_log_pos)); } else @@ -3894,7 +3961,9 @@ DBUG_PRINT("info", ("master_log_pos: %d", (ulong) mi->master_log_pos)); rli->relay_log.harvest_bytes_written(&rli->log_space_total); } + rli->ign_master_log_name_end[0]= 0; // last event is not ignored } + pthread_mutex_unlock(log_lock); err: pthread_mutex_unlock(&mi->data_lock); @@ -4262,6 +4331,27 @@ rli->last_master_timestamp= 0; DBUG_ASSERT(rli->relay_log.get_open_count() == rli->cur_log_old_open_count); + + if (rli->ign_master_log_name_end[0]) + { + /* We generate and return a Rotate, to make our positions advance */ + DBUG_PRINT("info",("seeing an ignored end segment")); + ev= new Rotate_log_event(thd, rli->ign_master_log_name_end, + 0, rli->ign_master_log_pos_end, + Rotate_log_event::DUP_NAME | + Rotate_log_event::ZERO_LEN); + rli->ign_master_log_name_end[0]= 0; + if (unlikely(!ev)) + { + errmsg= "Slave SQL thread failed to create a Rotate event " + "(out of memory?), SHOW SLAVE STATUS may be inaccurate"; + goto err; + } + pthread_mutex_unlock(log_lock); + ev->server_id= 0; // don't be ignored by slave SQL thread + DBUG_RETURN(ev); + } + /* We can, and should release data_lock while we are waiting for update. If we do not, show slave status will block --- New file --- +++ mysql-test/r/rpl_dual_pos_advance.result 05/10/12 13:29:51 stop slave; drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9; reset master; reset slave; drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9; start slave; reset master; change master to master_host="127.0.0.1",master_port=SLAVE_PORT,master_user="root"; start slave; create table t1 (n int); create table t4 (n int); create table t5 (n int); create table t6 (n int); show tables; Tables_in_test t1 t4 t5 t6 stop slave; reset slave; drop table t1,t4,t5,t6; --- New file --- +++ mysql-test/t/rpl_dual_pos_advance-master.opt 05/10/10 23:09:11 --- New file --- +++ mysql-test/t/rpl_dual_pos_advance.test 05/10/12 13:29:51 # This test checks that in a dual-head setup # A->B->A, where A has --log-slave-updates (why would it? # assume that there is a C as slave of A), # then the Exec_master_log_pos of SHOW SLAVE STATUS does # not stay too low on B(BUG#13023 due to events ignored because # of their server id). # It also will test BUG#13861. source include/master-slave.inc; # set up "dual head" connection slave; reset master; connection master; --replace_result $SLAVE_MYPORT SLAVE_PORT eval change master to master_host="127.0.0.1",master_port=$SLAVE_MYPORT,master_user="root"; start slave; # now we test it connection slave; create table t1 (n int); save_master_pos; connection master; sync_with_master; # Now test BUG#13861. This will be enabled when Guilhem fixes this # bug. # stop slave # create table t2 (n int); # create one ignored event # save_master_pos; # connection slave; # sync_with_master; # connection slave; # show tables; # save_master_pos; # create table t3 (n int); # connection master; # bug is that START SLAVE UNTIL may stop too late, we test that by # asking it to stop before creation of t3. # start slave until master_log_file="slave-bin.000001",master_log_pos=195; # wait until it's started (the position below is the start of "CREATE # TABLE t2") (otherwise wait_for_slave_to_stop may return at once) # select master_pos_wait("slave-bin.000001",137); # wait_for_slave_to_stop; # then BUG#13861 causes t3 to show up below (because stopped too # late). # show tables; # start slave; # BUG#13023 is that Exec_master_log_pos may stay too low "forever": connection master; create table t4 (n int); # create 3 ignored events create table t5 (n int); create table t6 (n int); save_master_pos; connection slave; sync_with_master; connection slave; save_master_pos; connection master; # then BUG#13023 caused hang below ("master" looks behind, while it's # not in terms of updates done). sync_with_master; show tables; # cleanup stop slave; reset slave; drop table t1,t4,t5,t6; # add t2 and t3 later save_master_pos; connection slave; sync_with_master; # End of 4.1 tests --- 1.87/sql/slave.h 2005-07-30 05:00:25 +02:00 +++ 1.88/sql/slave.h 2005-10-12 13:29:51 +02:00 @@ -304,6 +304,17 @@ */ ulong trans_retries, retried_trans; + /* + If the end of the hot relay log is made of master's events ignored by the + slave I/O thread, these two keep track of the coords (in the master's + binlog) of the last of these events seen by the slave I/O thread. If not, + ign_master_log_name_end[0] == 0. + As they are like a Rotate event read/written from/to the relay log, they + are both protected by rli->relay_log.LOCK_log. + */ + char ign_master_log_name_end[FN_REFLEN]; + ulonglong ign_master_log_pos_end; + st_relay_log_info(); ~st_relay_log_info();