#At file:///home/lsoares/Workspace/mysql-server/bugfix/43076/6.0-rpl/ based on revid:aelkin@stripped
2833 Luis Soares 2009-03-24
BUG#43076: rpl.rpl_idempotency fails sporadically on pushbuild
There were some valgrind warnings in strlen. This happened
because SHOW SLAVE STATUS connection thread would race with
SQL thread for the error message buffer. Sometimes the connection
thread was getting a half way completed message (ie, string
without NULL terminator, causing valgrind to complain with:
"Conditional jump or move depends on uninitialised value(s)".
This patch addresses this issue by synchronizing the error object
between IO/SQL thread and user thread on SHOW SLAVE STATUS.
@ mysql-test/suite/rpl/r/rpl_reporting.result
Plain result file.
@ mysql-test/suite/rpl/t/rpl_reporting.test
Test case that caused problem to arrise in valgrind PB runs. After this patch
the warnings should be no more.
@ sql/log_event.cc
Small comment on the about access to error message.
@ sql/protocol.cc
Assertion to check the presence of '\0' on expected null terminated string.
@ sql/rpl_reporting.cc
Added synchronization while writing to last_error.message . Furthermore,
added a conditional dbug execution point which enables reproducing
the previous faulty behavior and test the fix.
@ sql/rpl_reporting.h
Added the mutex as discussed within the team.
@ sql/slave.cc
Added synchronization on SHOW SLAVE STATUS.
added:
mysql-test/suite/rpl/r/rpl_reporting.result
mysql-test/suite/rpl/t/rpl_reporting.test
modified:
sql/log_event.cc
sql/protocol.cc
sql/rpl_reporting.cc
sql/rpl_reporting.h
sql/slave.cc
=== added file 'mysql-test/suite/rpl/r/rpl_reporting.result'
--- a/mysql-test/suite/rpl/r/rpl_reporting.result 1970-01-01 00:00:00 +0000
+++ b/mysql-test/suite/rpl/r/rpl_reporting.result 2009-03-24 09:42:09 +0000
@@ -0,0 +1,24 @@
+stop slave;
+drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
+reset master;
+reset slave;
+drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
+start slave;
+CREATE TABLE t (a int);
+SET global debug='+d,rpl_reporting_incomplete';
+SELECT GET_LOCK("rpl_reporting_incomplete_lock", 1);
+GET_LOCK("rpl_reporting_incomplete_lock", 1)
+1
+SET SQL_LOG_BIN= 0;
+INSERT INTO t VALUES (1);
+SET SQL_LOG_BIN= 1;
+UPDATE t SET a = 2 WHERE a = 1;
+SELECT RELEASE_LOCK("rpl_reporting_incomplete_lock");
+RELEASE_LOCK("rpl_reporting_incomplete_lock")
+1
+STOP SLAVE;
+RESET SLAVE;
+RESET MASTER;
+SET global debug='-d,rpl_reporting_incomplete';
+START SLAVE;
+DROP TABLE t;
=== added file 'mysql-test/suite/rpl/t/rpl_reporting.test'
--- a/mysql-test/suite/rpl/t/rpl_reporting.test 1970-01-01 00:00:00 +0000
+++ b/mysql-test/suite/rpl/t/rpl_reporting.test 2009-03-24 09:42:09 +0000
@@ -0,0 +1,75 @@
+# BUG#43076: rpl.rpl_idempotency fails sporadically on pushbuild
+#
+# Problem statement
+# =================
+#
+# There were some valgrind warnings in strlen. This happened
+# because SHOW SLAVE STATUS connection thread would race with
+# SQL thread for the error message buffer. Sometimes the connection
+# thread was getting a half way completed message (ie, string
+# without NULL terminator, causing valgrind to complain with:
+# "Conditional jump or move depends on uninitialised value(s)".
+#
+# Test procedure
+# ==============
+#
+# This test case is to check that there is no concurrent update
+# and reading of error message buffer. It works as follows:
+#
+# i) We activate the debug conditional code which causes the
+# SQL not to write the NULL character into the error message
+# buffer (using SET debug= ...);
+# ii) We activate the sync point which will cause the SQL thread
+# to stop before setting the NULL terminator character in the
+# string and terminate the error reporting successfully.
+# (using SELECT GET_LOCK)
+# iii) Intentionally, we cause a RBR Update event to fail on the
+# slave.
+# iv) On the slave we wait for the SQL thread to stop (which
+# internally issues SHOW SLAVE STATUS). This is to follow
+# closely the behavior on the rpl_idempotency test.
+#
+# OBS: At this point, valgrind run would report an unconditional jump,
+# before the patch for BUG#43076.
+#
+# vi) We release the lock and let the server proceed as usual.
+#
+# v) Finally, we reset master and slave and proceed with clean up.
+#
+
+
+source include/master-slave.inc;
+source include/have_binlog_format_row.inc;
+
+connection master;
+CREATE TABLE t (a int);
+
+connection slave;
+SET global debug='+d,rpl_reporting_incomplete';
+SELECT GET_LOCK("rpl_reporting_incomplete_lock", 1);
+
+connection master;
+SET SQL_LOG_BIN= 0;
+INSERT INTO t VALUES (1);
+SET SQL_LOG_BIN= 1;
+
+UPDATE t SET a = 2 WHERE a = 1;
+
+connection slave;
+source include/wait_for_slave_sql_to_stop.inc;
+SELECT RELEASE_LOCK("rpl_reporting_incomplete_lock");
+STOP SLAVE;
+RESET SLAVE;
+
+connection master;
+RESET MASTER;
+
+connection slave;
+SET global debug='-d,rpl_reporting_incomplete';
+START SLAVE;
+source include/wait_for_slave_to_start.inc;
+
+connection master;
+DROP TABLE t;
+
+sync_slave_with_master;
=== modified file 'sql/log_event.cc'
--- a/sql/log_event.cc 2009-03-09 12:17:41 +0000
+++ b/sql/log_event.cc 2009-03-24 09:42:09 +0000
@@ -6463,6 +6463,12 @@ int Execute_load_log_event::do_apply_eve
don't want to overwrite it with the filename.
What we want instead is add the filename to the current error message.
*/
+
+ /*
+ No need to lock because SQL thread is the only writer of
+ last_error().message. Lock for writing is grabbed inside
+ rli->report.
+ */
char *tmp= my_strdup(rli->last_error().message, MYF(MY_WME));
if (tmp)
{
=== modified file 'sql/protocol.cc'
--- a/sql/protocol.cc 2009-03-09 12:49:47 +0000
+++ b/sql/protocol.cc 2009-03-24 09:42:09 +0000
@@ -852,6 +852,8 @@ bool Protocol::send_result_set_row(List<
bool Protocol::store(const char *from, CHARSET_INFO *cs)
{
+ /* when in debug mode, assert that from is null or is null terminated */
+ DBUG_ASSERT(!from || (strchr(from, '\0') != NULL));
if (!from)
return store_null();
uint length= strlen(from);
=== modified file 'sql/rpl_reporting.cc'
--- a/sql/rpl_reporting.cc 2007-06-11 20:15:39 +0000
+++ b/sql/rpl_reporting.cc 2009-03-24 09:42:09 +0000
@@ -1,6 +1,7 @@
#include "mysql_priv.h"
#include "rpl_reporting.h"
+#include "debug_sync.h"
void
Slave_reporting_capability::report(loglevel level, int err_code,
@@ -20,6 +21,7 @@ Slave_reporting_capability::report(logle
It's an error, it must be reported in Last_error and Last_errno in SHOW
SLAVE STATUS.
*/
+ pthread_mutex_lock(&err_lock);
pbuff= m_last_error.message;
pbuffsize= sizeof(m_last_error.message);
m_last_error.number = err_code;
@@ -37,6 +39,16 @@ Slave_reporting_capability::report(logle
}
my_vsnprintf(pbuff, pbuffsize, msg, args);
+ DBUG_EXECUTE_IF("rpl_reporting_incomplete",
+ {
+ size_t size= strlen(pbuff);
+ pbuff[size]= 'X';
+ DBUG_SYNC_POINT("rpl_reporting_incomplete_lock", 5);
+ pbuff[size]= '\0';
+ });
+
+ if (pbuff == m_last_error.message)
+ pthread_mutex_unlock(&err_lock);
va_end(args);
=== modified file 'sql/rpl_reporting.h'
--- a/sql/rpl_reporting.h 2007-06-11 20:15:39 +0000
+++ b/sql/rpl_reporting.h 2009-03-24 09:42:09 +0000
@@ -16,6 +16,9 @@
class Slave_reporting_capability
{
public:
+ /** lock used to synchronize m_last_error on 'SHOW SLAVE STATUS' **/
+ mutable pthread_mutex_t err_lock;
+
/**
Constructor.
@@ -24,6 +27,7 @@ public:
Slave_reporting_capability(char const *thread_name)
: m_thread_name(thread_name)
{
+ pthread_mutex_init(&err_lock, MY_MUTEX_INIT_FAST);
}
/**
@@ -44,7 +48,9 @@ public:
STATUS</code>.
*/
void clear_error() {
+ pthread_mutex_lock(&err_lock);
m_last_error.clear();
+ pthread_mutex_unlock(&err_lock);
}
/**
=== modified file 'sql/slave.cc'
--- a/sql/slave.cc 2009-03-16 11:56:39 +0000
+++ b/sql/slave.cc 2009-03-24 09:42:09 +0000
@@ -1493,6 +1493,8 @@ bool show_master_info(THD* thd, Master_i
pthread_mutex_lock(&mi->data_lock);
pthread_mutex_lock(&mi->rli.data_lock);
+ pthread_mutex_lock(&mi->rli.err_lock);
+ pthread_mutex_lock(&mi->err_lock);
protocol->store(mi->host, &my_charset_bin);
protocol->store(mi->user, &my_charset_bin);
protocol->store((uint32) mi->port);
@@ -1621,6 +1623,8 @@ bool show_master_info(THD* thd, Master_i
pthread_mutex_unlock(&mi->rli.data_lock);
pthread_mutex_unlock(&mi->data_lock);
+ pthread_mutex_unlock(&mi->rli.err_lock);
+ pthread_mutex_unlock(&mi->err_lock);
if (my_net_write(&thd->net, (uchar*) thd->packet.ptr(), packet->length()))
DBUG_RETURN(TRUE);
Attachment: [text/bzr-bundle] bzr/luis.soares@sun.com-20090324094209-b2zfp0merhkvx7a5.bundle