List:Commits« Previous MessageNext Message »
From:Luis Soares Date:March 24 2009 9:42am
Subject:bzr commit into mysql-6.0-rpl branch (luis.soares:2833) Bug#43076
View as plain text  
#At file:///home/lsoares/Workspace/mysql-server/bugfix/43076/6.0-rpl/ based on revid:aelkin@stripped

 2833 Luis Soares	2009-03-24
      BUG#43076: rpl.rpl_idempotency fails sporadically on pushbuild
      
      There were some valgrind warnings in strlen. This happened 
      because SHOW SLAVE STATUS connection thread would race with 
      SQL thread for the error message buffer. Sometimes the connection
      thread was getting a half way completed message (ie, string 
      without NULL terminator, causing valgrind to complain with: 
      "Conditional jump or move depends on uninitialised value(s)".
      
      This patch addresses this issue by synchronizing the error object
      between IO/SQL thread and user thread on SHOW SLAVE STATUS.
     @ mysql-test/suite/rpl/r/rpl_reporting.result
        Plain result file.
     @ mysql-test/suite/rpl/t/rpl_reporting.test
        Test case that caused problem to arrise in valgrind PB runs. After this patch
        the warnings should be no more.
     @ sql/log_event.cc
        Small comment on the about access to error message.
     @ sql/protocol.cc
        Assertion to check the presence of '\0' on expected null terminated string.
     @ sql/rpl_reporting.cc
        Added synchronization while writing to last_error.message . Furthermore, 
        added a conditional dbug execution point which enables reproducing 
        the previous faulty behavior and test the fix.
     @ sql/rpl_reporting.h
        Added the mutex as discussed within the team.
     @ sql/slave.cc
        Added synchronization on SHOW SLAVE STATUS.

    added:
      mysql-test/suite/rpl/r/rpl_reporting.result
      mysql-test/suite/rpl/t/rpl_reporting.test
    modified:
      sql/log_event.cc
      sql/protocol.cc
      sql/rpl_reporting.cc
      sql/rpl_reporting.h
      sql/slave.cc
=== added file 'mysql-test/suite/rpl/r/rpl_reporting.result'
--- a/mysql-test/suite/rpl/r/rpl_reporting.result	1970-01-01 00:00:00 +0000
+++ b/mysql-test/suite/rpl/r/rpl_reporting.result	2009-03-24 09:42:09 +0000
@@ -0,0 +1,24 @@
+stop slave;
+drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
+reset master;
+reset slave;
+drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
+start slave;
+CREATE TABLE t (a int);
+SET global debug='+d,rpl_reporting_incomplete';
+SELECT GET_LOCK("rpl_reporting_incomplete_lock", 1);
+GET_LOCK("rpl_reporting_incomplete_lock", 1)
+1
+SET SQL_LOG_BIN= 0;
+INSERT INTO t VALUES (1);
+SET SQL_LOG_BIN= 1;
+UPDATE t SET a = 2 WHERE a = 1;
+SELECT RELEASE_LOCK("rpl_reporting_incomplete_lock");
+RELEASE_LOCK("rpl_reporting_incomplete_lock")
+1
+STOP SLAVE;
+RESET SLAVE;
+RESET MASTER;
+SET global debug='-d,rpl_reporting_incomplete';
+START SLAVE;
+DROP TABLE t;

=== added file 'mysql-test/suite/rpl/t/rpl_reporting.test'
--- a/mysql-test/suite/rpl/t/rpl_reporting.test	1970-01-01 00:00:00 +0000
+++ b/mysql-test/suite/rpl/t/rpl_reporting.test	2009-03-24 09:42:09 +0000
@@ -0,0 +1,75 @@
+# BUG#43076: rpl.rpl_idempotency fails sporadically on pushbuild
+# 
+# Problem statement
+# =================
+#
+#   There were some valgrind warnings in strlen. This happened 
+#   because SHOW SLAVE STATUS connection thread would race with 
+#   SQL thread for the error message buffer. Sometimes the connection
+#   thread was getting a half way completed message (ie, string 
+#   without NULL terminator, causing valgrind to complain with: 
+#   "Conditional jump or move depends on uninitialised value(s)".
+#
+# Test procedure
+# ==============
+#
+#   This test case is to check that there is no concurrent update
+#   and reading of error message buffer. It works as follows:
+#
+#     i) We activate the debug conditional code which causes the
+#        SQL not to write the NULL character into the error message
+#        buffer (using SET debug= ...);
+#    ii) We activate the sync point which will cause the SQL thread 
+#        to stop before setting the NULL terminator character in the 
+#        string and terminate the error reporting successfully.
+#        (using SELECT GET_LOCK)
+#   iii) Intentionally, we cause a RBR Update event to fail  on the 
+#        slave.
+#    iv) On the slave we wait for the SQL thread to stop (which 
+#        internally issues SHOW SLAVE STATUS). This is to follow
+#        closely the behavior on the rpl_idempotency test.
+#
+#   OBS: At this point, valgrind run would report an unconditional jump,
+#        before the patch for BUG#43076.
+#
+#    vi) We release the lock and let the server proceed as usual.
+#
+#     v) Finally, we reset master and slave and proceed with clean up.
+#
+
+
+source include/master-slave.inc;
+source include/have_binlog_format_row.inc;
+
+connection master;
+CREATE TABLE t (a int);
+
+connection slave;
+SET global debug='+d,rpl_reporting_incomplete';
+SELECT GET_LOCK("rpl_reporting_incomplete_lock", 1);
+
+connection master;
+SET SQL_LOG_BIN= 0;
+INSERT INTO t VALUES (1);
+SET SQL_LOG_BIN= 1;
+
+UPDATE t SET a = 2 WHERE a = 1;
+
+connection slave;
+source include/wait_for_slave_sql_to_stop.inc;
+SELECT RELEASE_LOCK("rpl_reporting_incomplete_lock");
+STOP SLAVE;
+RESET SLAVE;
+
+connection master;
+RESET MASTER;
+
+connection slave;
+SET global debug='-d,rpl_reporting_incomplete';
+START SLAVE;
+source include/wait_for_slave_to_start.inc;
+
+connection master;
+DROP TABLE t;
+
+sync_slave_with_master;

=== modified file 'sql/log_event.cc'
--- a/sql/log_event.cc	2009-03-09 12:17:41 +0000
+++ b/sql/log_event.cc	2009-03-24 09:42:09 +0000
@@ -6463,6 +6463,12 @@ int Execute_load_log_event::do_apply_eve
       don't want to overwrite it with the filename.
       What we want instead is add the filename to the current error message.
     */
+
+    /* 
+       No need to lock because SQL thread is the only writer of
+       last_error().message. Lock for writing is grabbed inside
+       rli->report.
+    */
     char *tmp= my_strdup(rli->last_error().message, MYF(MY_WME));
     if (tmp)
     {

=== modified file 'sql/protocol.cc'
--- a/sql/protocol.cc	2009-03-09 12:49:47 +0000
+++ b/sql/protocol.cc	2009-03-24 09:42:09 +0000
@@ -852,6 +852,8 @@ bool Protocol::send_result_set_row(List<
 
 bool Protocol::store(const char *from, CHARSET_INFO *cs)
 {
+  /* when in debug mode, assert that from is null or is null terminated */
+  DBUG_ASSERT(!from || (strchr(from, '\0') != NULL));
   if (!from)
     return store_null();
   uint length= strlen(from);

=== modified file 'sql/rpl_reporting.cc'
--- a/sql/rpl_reporting.cc	2007-06-11 20:15:39 +0000
+++ b/sql/rpl_reporting.cc	2009-03-24 09:42:09 +0000
@@ -1,6 +1,7 @@
 
 #include "mysql_priv.h"
 #include "rpl_reporting.h"
+#include "debug_sync.h"
 
 void
 Slave_reporting_capability::report(loglevel level, int err_code,
@@ -20,6 +21,7 @@ Slave_reporting_capability::report(logle
       It's an error, it must be reported in Last_error and Last_errno in SHOW
       SLAVE STATUS.
     */
+    pthread_mutex_lock(&err_lock);
     pbuff= m_last_error.message;
     pbuffsize= sizeof(m_last_error.message);
     m_last_error.number = err_code;
@@ -37,6 +39,16 @@ Slave_reporting_capability::report(logle
   }
 
   my_vsnprintf(pbuff, pbuffsize, msg, args);
+  DBUG_EXECUTE_IF("rpl_reporting_incomplete",
+                  {
+                    size_t size= strlen(pbuff);
+                    pbuff[size]= 'X';
+                    DBUG_SYNC_POINT("rpl_reporting_incomplete_lock", 5);
+                    pbuff[size]= '\0';
+                  });
+
+  if (pbuff == m_last_error.message)
+    pthread_mutex_unlock(&err_lock);
 
   va_end(args);
 

=== modified file 'sql/rpl_reporting.h'
--- a/sql/rpl_reporting.h	2007-06-11 20:15:39 +0000
+++ b/sql/rpl_reporting.h	2009-03-24 09:42:09 +0000
@@ -16,6 +16,9 @@
 class Slave_reporting_capability
 {
 public:
+  /** lock used to synchronize m_last_error on 'SHOW SLAVE STATUS' **/
+  mutable pthread_mutex_t err_lock;
+
   /**
      Constructor.
 
@@ -24,6 +27,7 @@ public:
   Slave_reporting_capability(char const *thread_name)
     : m_thread_name(thread_name)
   {
+      pthread_mutex_init(&err_lock, MY_MUTEX_INIT_FAST);
   }
 
   /**
@@ -44,7 +48,9 @@ public:
      STATUS</code>.
    */
   void clear_error() {
+    pthread_mutex_lock(&err_lock);
     m_last_error.clear();
+    pthread_mutex_unlock(&err_lock);
   }
 
   /**

=== modified file 'sql/slave.cc'
--- a/sql/slave.cc	2009-03-16 11:56:39 +0000
+++ b/sql/slave.cc	2009-03-24 09:42:09 +0000
@@ -1493,6 +1493,8 @@ bool show_master_info(THD* thd, Master_i
 
     pthread_mutex_lock(&mi->data_lock);
     pthread_mutex_lock(&mi->rli.data_lock);
+    pthread_mutex_lock(&mi->rli.err_lock);
+    pthread_mutex_lock(&mi->err_lock);
     protocol->store(mi->host, &my_charset_bin);
     protocol->store(mi->user, &my_charset_bin);
     protocol->store((uint32) mi->port);
@@ -1621,6 +1623,8 @@ bool show_master_info(THD* thd, Master_i
 
     pthread_mutex_unlock(&mi->rli.data_lock);
     pthread_mutex_unlock(&mi->data_lock);
+    pthread_mutex_unlock(&mi->rli.err_lock);
+    pthread_mutex_unlock(&mi->err_lock);
 
     if (my_net_write(&thd->net, (uchar*) thd->packet.ptr(), packet->length()))
       DBUG_RETURN(TRUE);


Attachment: [text/bzr-bundle] bzr/luis.soares@sun.com-20090324094209-b2zfp0merhkvx7a5.bundle
Thread
bzr commit into mysql-6.0-rpl branch (luis.soares:2833) Bug#43076Luis Soares24 Mar
  • Re: bzr commit into mysql-6.0-rpl branch (luis.soares:2833) Bug#43076Alfranio Correia26 Mar
    • Re: bzr commit into mysql-6.0-rpl branch (luis.soares:2833) Bug#43076Luís Soares27 Mar
  • Re: bzr commit into mysql-6.0-rpl branch (luis.soares:2833) Bug#43076Mats Kindahl26 Mar
    • Re: bzr commit into mysql-6.0-rpl branch (luis.soares:2833) Bug#43076Luís Soares27 Mar
      • Re: bzr commit into mysql-6.0-rpl branch (luis.soares:2833) Bug#43076Mats Kindahl27 Mar