#At file:///home/andrei/MySQL/BZR/FIXES/6.0-rpl-bug37714-rpl_heartbeat/ based on revid:zhenxing.he@stripped
2858 Andrei Elkin 2009-06-03
Bug #37714 rpl.rpl_heartbeat fails sporadically in pushbuild due to timeout
The reason of the bug is a feature of pthread_cond_timedwait() having a time window
in between of the timer elapsed that wakes up the thread and
the thread re-acquired the mutex. There could be signals sent to the dump thread at
times of the interval so that the dump thread was not aware of updating of the binlog
and continued to stay in the loop.
Fixed by augmenting MYSQL_BIN_LOG class with a counter what is checked prior and after
the wake-up to catch the fact of the binlog got updated.
modified:
sql/log.cc
sql/log.h
sql/sql_repl.cc
per-file messages:
sql/log.cc
MYSQL_BIN_LOG class is augmented with `signal_cnt' counter to increment
per an event write to the binlog file.
sql/log.h
MYSQL_BIN_LOG class is augmented with `signal_cnt' counter.
sql/sql_repl.cc
In the inner-most while-loop the dump thread checks if mysql_bin_log.signal_cnt
had changed while it it was asleep. A change designates a new event has been recorded
and needs sending out, and that breaks the loop.
=== modified file 'sql/log.cc'
--- a/sql/log.cc 2009-05-14 08:56:34 +0000
+++ b/sql/log.cc 2009-06-03 15:42:08 +0000
@@ -4219,7 +4219,7 @@ MYSQL_BIN_LOG::MYSQL_BIN_LOG(uint *sync_
:bytes_written(0), prepared_xids(0), file_id(1), open_count(1),
need_start_event(TRUE), m_table_map_version(0),
sync_period_ptr(sync_period),
- is_relay_log(0),
+ is_relay_log(0), signal_cnt(0),
description_event_for_exec(0), description_event_for_queue(0)
{
/*
@@ -6670,6 +6670,7 @@ bool flush_error_log()
void MYSQL_BIN_LOG::signal_update()
{
DBUG_ENTER("MYSQL_BIN_LOG::signal_update");
+ signal_cnt++;
pthread_cond_broadcast(&update_cond);
DBUG_VOID_RETURN;
}
=== modified file 'sql/log.h'
--- a/sql/log.h 2009-04-24 12:55:11 +0000
+++ b/sql/log.h 2009-06-03 15:42:08 +0000
@@ -414,7 +414,7 @@ public:
/* This is relay log */
bool is_relay_log;
-
+ ulong signal_cnt; // update of the counter is checked by heartbeat
/*
These describe the log's format. This is used only for relay logs.
_for_exec is used by the SQL thread, _for_queue by the I/O thread. It's
=== modified file 'sql/sql_repl.cc'
--- a/sql/sql_repl.cc 2009-05-18 08:41:20 +0000
+++ b/sql/sql_repl.cc 2009-06-03 15:42:08 +0000
@@ -816,18 +816,19 @@ impossible position";
case LOG_READ_EOF:
{
int ret;
+ ulong signal_cnt;
DBUG_PRINT("wait",("waiting for data in binary log"));
if (thd->server_id==0) // for mysqlbinlog (mysqlbinlog.server_id==0)
{
pthread_mutex_unlock(log_lock);
goto end;
}
-
+ signal_cnt= mysql_bin_log.signal_cnt;
+ do
+ {
#ifndef DBUG_OFF
- ulong hb_info_counter= 0;
+ ulong hb_info_counter= 0;
#endif
- do
- {
if (coord)
{
DBUG_ASSERT(heartbeat_ts && heartbeat_period != 0LL);
@@ -859,12 +860,13 @@ impossible position";
}
else
{
- DBUG_ASSERT(ret == 0);
+ DBUG_ASSERT(ret == 0 && signal_cnt != mysql_bin_log.signal_cnt ||
+ thd->killed);
DBUG_PRINT("wait",("binary log received update"));
}
- } while (ret != 0 && coord != NULL && !thd->killed);
+ } while (signal_cnt == mysql_bin_log.signal_cnt && !thd->killed);
pthread_mutex_unlock(log_lock);
- }
+ }
break;
default:
| Thread |
|---|
| • bzr commit into mysql-6.0-rpl branch (aelkin:2858) Bug#37714 | Andrei Elkin | 3 Jun |