MySQL Lists are EOL. Please join:

List:Commits« Previous MessageNext Message »
From:Li-Bing.Song Date:September 26 2010 1:05pm
Subject:bzr commit into mysql-5.1-bugteam branch (Li-Bing.Song:3519)
View as plain text  
#At file:///home/anders/work/bzrwork1/wt3/mysql-5.1-bugteam/ based on revid:davi.arnaut@stripped

 3519 Li-Bing.Song@stripped	2010-09-26
      When slave executes a transaction bigger than slave's max_binlog_cache_size,
      slave will crash. It is caused by the assert that server should only roll back
      the statement but not the whole transaction if the error ER_TRANS_CACHE_FULL 
      happens. But slave sql thread always rollbacks the whole transaction when
      an error happens.
                  
      Ather this patch, we always clear any error set in sql thread(it is different
      from the error in 'SHOW SLAVE STATUS') and it is cleared before rolling back
      the transaction.
     @ mysql-test/suite/rpl/r/rpl_binlog_max_cache_size.result
        SET binlog_cache_size and max_binlog_cache_size for all test cases.
        Add test case for bug#55375.
     @ mysql-test/suite/rpl/t/rpl_binlog_max_cache_size-master.opt
        binlog_cache_size and max_binlog_cache_size can be set in the client connection.
        so remove this option file.
     @ mysql-test/suite/rpl/t/rpl_binlog_max_cache_size.test
        SET binlog_cache_size and max_binlog_cache_size for all test cases.
        Add test case for bug#55375.
     @ sql/log_event.cc
        Some functions don't return the error code, so it is a wrong error code.
        The error should always be set into thd->main_da. So we use 
        slave_rows_error_report to report the right error.
     @ sql/slave.cc
        exec_relay_log_event() need call cleanup_context() to clear context. 
        clearup_context() will call end_trans().
                
        Clear thd's error before cleanup_context. It avoid to trigger the assert
        which cause this bug.

    removed:
      mysql-test/suite/rpl/t/rpl_binlog_max_cache_size-master.opt
    modified:
      mysql-test/suite/rpl/r/rpl_binlog_max_cache_size.result
      mysql-test/suite/rpl/t/rpl_binlog_max_cache_size.test
      sql/log_event.cc
      sql/slave.cc
=== modified file 'mysql-test/suite/rpl/r/rpl_binlog_max_cache_size.result'
--- a/mysql-test/suite/rpl/r/rpl_binlog_max_cache_size.result	2009-06-18 13:52:46 +0000
+++ b/mysql-test/suite/rpl/r/rpl_binlog_max_cache_size.result	2010-09-26 13:05:05 +0000
@@ -4,6 +4,8 @@ reset master;
 reset slave;
 drop table if exists t1,t2,t3,t4,t5,t6,t7,t8,t9;
 start slave;
+SET GLOBAL max_binlog_cache_size = 4096;
+SET GLOBAL binlog_cache_size = 4096;
 CREATE TABLE t1(a INT PRIMARY KEY, data VARCHAR(30000)) ENGINE=Innodb;
 CREATE TABLE t2(a INT PRIMARY KEY, data VARCHAR(30000)) ENGINE=MyIsam;
 CREATE TABLE t3(a INT PRIMARY KEY, data VARCHAR(30000)) ENGINE=Innodb;
@@ -15,11 +17,14 @@ Got one of the listed errors
 *** Single statement on non-transactional table ***
 *** After WL#2687 the difference between STATEMENT/MIXED and ROW will not exist. ***
 Got one of the listed errors
+--source include/wait_for_slave_sql_error_and_skip.inc
+include/start_slave.inc
 *** Single statement on both transactional and non-transactional tables. ***
 *** After WL#2687 we will be able to change the order of the tables. ***
 Got one of the listed errors
-SET GLOBAL SQL_SLAVE_SKIP_COUNTER = 1;
-START SLAVE SQL_THREAD;
+--source include/wait_for_slave_sql_error_and_skip.inc
+SET GLOBAL SQL_SLAVE_SKIP_COUNTER=1;
+include/start_slave.inc
 TRUNCATE TABLE t1;
 TRUNCATE TABLE t2;
 TRUNCATE TABLE t3;
@@ -116,6 +121,37 @@ COMMIT;
 BEGIN;
 Got one of the listed errors
 COMMIT;
+--source include/wait_for_slave_sql_error_and_skip.inc
+SET GLOBAL SQL_SLAVE_SKIP_COUNTER=1;
+include/start_slave.inc
+########################################################################
+#      8 - Bug#55375(Regression Bug) Transaction bigger than
+#          max_binlog_cache_size crashes slave
+########################################################################
+SET GLOBAL max_binlog_cache_size = 4096;
+SET GLOBAL binlog_cache_size = 4096;
+include/stop_slave.inc
+include/start_slave.inc
+CALL mtr.add_suppression("Multi-statement transaction required more than 'max_binlog_cache_size' bytes of storage.*");
+CALL mtr.add_suppression("Writing one row to the row-based binary log failed.*");
+TRUNCATE t1;
+SET GLOBAL max_binlog_cache_size= ORIGINAL_VALUE;
+SET GLOBAL binlog_cache_size= ORIGINAL_VALUE;
+BEGIN;
+Repeat statement 'INSERT INTO t1 VALUES($n, repeat("a", 32))' 128 times
+COMMIT;
+SELECT count(*) FROM t1;
+count(*)
+0
+show binlog events in 'slave-bin.000001' from <binlog_start>;
+Log_name	Pos	Event_type	Server_id	End_log_pos	Info
+SET GLOBAL max_binlog_cache_size= ORIGINAL_VALUE;
+SET GLOBAL binlog_cache_size= ORIGINAL_VALUE;
+include/stop_slave.inc
+include/start_slave.inc
+SELECT count(*) FROM t1;
+count(*)
+128
 ########################################################################################
 #                                        CLEAN
 ########################################################################################
@@ -126,10 +162,3 @@ DROP TABLE IF EXISTS t4;
 DROP TABLE IF EXISTS t5;
 DROP TABLE IF EXISTS t6;
 DROP PROCEDURE p1;
-DROP TABLE t1;
-DROP TABLE t2;
-DROP TABLE t3;
-DROP TABLE IF EXISTS t4;
-DROP TABLE IF EXISTS t5;
-DROP TABLE IF EXISTS t6;
-DROP PROCEDURE p1;

=== removed file 'mysql-test/suite/rpl/t/rpl_binlog_max_cache_size-master.opt'
--- a/mysql-test/suite/rpl/t/rpl_binlog_max_cache_size-master.opt	2009-06-18 13:52:46 +0000
+++ b/mysql-test/suite/rpl/t/rpl_binlog_max_cache_size-master.opt	1970-01-01 00:00:00 +0000
@@ -1 +0,0 @@
---binlog_cache_size=4096 --max_binlog_cache_size=7680

=== modified file 'mysql-test/suite/rpl/t/rpl_binlog_max_cache_size.test'
--- a/mysql-test/suite/rpl/t/rpl_binlog_max_cache_size.test	2009-06-18 13:52:46 +0000
+++ b/mysql-test/suite/rpl/t/rpl_binlog_max_cache_size.test	2010-09-26 13:05:05 +0000
@@ -30,6 +30,16 @@
 --source include/not_embedded.inc
 --source include/not_windows.inc
 
+let $old_max_binlog_cache_size= query_get_value(SHOW VARIABLES LIKE "max_binlog_cache_size", Value, 1);
+let $old_binlog_cache_size= query_get_value(SHOW VARIABLES LIKE "binlog_cache_size", Value, 1);
+
+SET GLOBAL max_binlog_cache_size = 4096;
+# Becuase of bug#55377, we have to set binlog_cache_size until the bug is
+# fixed.
+SET GLOBAL binlog_cache_size = 4096;
+disconnect master;
+connect (master,127.0.0.1,root,,test,$MASTER_MYPORT,);
+
 CREATE TABLE t1(a INT PRIMARY KEY, data VARCHAR(30000)) ENGINE=Innodb;
 CREATE TABLE t2(a INT PRIMARY KEY, data VARCHAR(30000)) ENGINE=MyIsam;
 CREATE TABLE t3(a INT PRIMARY KEY, data VARCHAR(30000)) ENGINE=Innodb;
@@ -57,25 +67,23 @@ if (`SELECT @@binlog_format = 'STATEMENT
 {
   eval INSERT INTO t2 (a, data) VALUES (2,
        CONCAT($data, $data, $data, $data, $data, $data));
+  # Below code fakes the result of 'ROW' mode.
   --echo Got one of the listed errors
+  --echo --source include/wait_for_slave_sql_error_and_skip.inc
+  --echo include/start_slave.inc
 }
 if (`SELECT @@binlog_format = 'ROW'`)
 {
   --error ER_TRANS_CACHE_FULL, ER_ERROR_ON_WRITE
   eval INSERT INTO t2 (a, data) VALUES (2,
        CONCAT($data, $data, $data, $data, $data, $data));
-
-  connection slave;
-  --source include/wait_for_slave_sql_to_stop.inc
-  SET GLOBAL SQL_SLAVE_SKIP_COUNTER = 1;
-  START SLAVE SQL_THREAD;
-  --source include/wait_for_slave_sql_to_start.inc
+  # Incident event
+  --let $slave_sql_errno=1590
+  --source include/wait_for_slave_sql_error_and_skip.inc
 }
 --enable_warnings
 --enable_query_log
 
-connection master;
-
 --disable_query_log
 eval INSERT INTO t1 (a, data) VALUES (3, $data);
 eval INSERT INTO t1 (a, data) VALUES (4, $data);
@@ -93,11 +101,9 @@ eval UPDATE t2, t1 SET t2.data = CONCAT(
                        t1.data = CONCAT($data, $data, $data, $data);
 --enable_query_log
 
-connection slave;
---source include/wait_for_slave_sql_to_stop.inc
-SET GLOBAL SQL_SLAVE_SKIP_COUNTER = 1;
-START SLAVE SQL_THREAD;
---source include/wait_for_slave_sql_to_start.inc
+# Incident event
+--let $slave_sql_errno=1590
+--source include/wait_for_slave_sql_error_and_skip.inc
 
 #--echo ########################################################################################
 #--echo #                             2 - BEGIN - IMPLICIT COMMIT by DDL
@@ -368,8 +374,70 @@ BEGIN;
 --enable_query_log
 COMMIT;
 
+# Incident event
+--let $slave_sql_errno=1590
+--source include/wait_for_slave_sql_error_and_skip.inc
+
+sync_slave_with_master;
+
+--echo ########################################################################
+--echo #      8 - Bug#55375(Regression Bug) Transaction bigger than
+--echo #          max_binlog_cache_size crashes slave
+--echo ########################################################################
+
+SET GLOBAL max_binlog_cache_size = 4096;
+SET GLOBAL binlog_cache_size = 4096;
+
+source include/stop_slave.inc;
+source include/start_slave.inc;
+CALL mtr.add_suppression("Multi-statement transaction required more than 'max_binlog_cache_size' bytes of storage.*");
+CALL mtr.add_suppression("Writing one row to the row-based binary log failed.*");
+
+connection master;
+TRUNCATE t1;
+
+sync_slave_with_master;
+--let binlog_start= query_get_value(SHOW MASTER STATUS, Position, 1)
+--let binlog_file= query_get_value(SHOW MASTER STATUS, File, 1)
+
+connection master;
+--replace_result $old_max_binlog_cache_size ORIGINAL_VALUE
+--eval SET GLOBAL max_binlog_cache_size= $old_max_binlog_cache_size
+--replace_result $old_binlog_cache_size ORIGINAL_VALUE
+--eval SET GLOBAL binlog_cache_size= $old_binlog_cache_size
+disconnect master;
+connect (master,127.0.0.1,root,,test,$MASTER_MYPORT,);
+
+--let $n=128
+BEGIN;
+--disable_query_log
+--echo Repeat statement 'INSERT INTO t1 VALUES(\$n, repeat("a", 32))' $n times
+while ($n)
+{
+  --eval INSERT INTO t1 VALUES ($n, repeat("a", 32))
+  --dec $n
+}
+--enable_query_log
+COMMIT;
+
 connection slave;
---source include/wait_for_slave_sql_to_stop.inc
+--let $slave_sql_errno= 1197
+source include/wait_for_slave_sql_error.inc;
+
+SELECT count(*) FROM t1;
+source include/show_binlog_events.inc;
+
+--replace_result $old_max_binlog_cache_size ORIGINAL_VALUE
+--eval SET GLOBAL max_binlog_cache_size= $old_max_binlog_cache_size
+--replace_result $old_binlog_cache_size ORIGINAL_VALUE
+--eval SET GLOBAL binlog_cache_size= $old_binlog_cache_size
+
+source include/stop_slave.inc;
+source include/start_slave.inc;
+
+connection master;
+sync_slave_with_master;
+SELECT count(*) FROM t1;
 
 --echo ########################################################################################
 --echo #                                        CLEAN
@@ -384,12 +452,5 @@ DROP TABLE IF EXISTS t4;
 DROP TABLE IF EXISTS t5;
 DROP TABLE IF EXISTS t6;
 DROP PROCEDURE p1;
-connection slave;
-DROP TABLE t1;
-DROP TABLE t2;
-DROP TABLE t3;
-DROP TABLE IF EXISTS t4;
-DROP TABLE IF EXISTS t5;
-DROP TABLE IF EXISTS t6;
-DROP PROCEDURE p1;
 --enable_warnings
+source include/master-slave-end.inc;

=== modified file 'sql/log_event.cc'
--- a/sql/log_event.cc	2010-07-09 12:00:17 +0000
+++ b/sql/log_event.cc	2010-09-26 13:05:05 +0000
@@ -144,16 +144,21 @@ static void inline slave_rows_error_repo
     len= my_snprintf(slider, buff_end - slider,
                      " %s, Error_code: %d;", err->msg, err->code);
   }
-  
-  rli->report(level, thd->is_error()? thd->main_da.sql_errno() : 0,
-              "Could not execute %s event on table %s.%s;"
-              "%s handler error %s; "
-              "the event's master log %s, end_log_pos %lu",
-              type, table->s->db.str,
-              table->s->table_name.str,
-              buff,
-              handler_error == NULL? "<unknown>" : handler_error,
-              log_name, pos);
+
+  if (ha_error != 0)
+    rli->report(level, thd->is_error() ? thd->main_da.sql_errno() : 0,
+                "Could not execute %s event on table %s.%s;"
+                "%s handler error %s; "
+                "the event's master log %s, end_log_pos %lu",
+                type, table->s->db.str, table->s->table_name.str,
+                buff, handler_error == NULL ? "<unknown>" : handler_error,
+                log_name, pos);
+  else
+    rli->report(level, thd->is_error() ? thd->main_da.sql_errno() : 0,
+                "Could not execute %s event on table %s.%s;"
+                "%s the event's master log %s, end_log_pos %lu",
+                type, table->s->db.str, table->s->table_name.str,
+                buff, log_name, pos);
 }
 #endif
 
@@ -7649,7 +7654,6 @@ int Rows_log_event::do_apply_event(Relay
                              get_type_str(),
                              RPL_LOG_NAME, (ulong) log_pos);
     thd->reset_current_stmt_binlog_row_based();
-    const_cast<Relay_log_info*>(rli)->cleanup_context(thd, error);
     thd->is_slave_error= 1;
     DBUG_RETURN(error);
   }
@@ -7680,14 +7684,12 @@ int Rows_log_event::do_apply_event(Relay
     const_cast<Relay_log_info*>(rli)->last_event_start_time= my_time(0);
   }
 
-  if (get_flags(STMT_END_F))
-    if ((error= rows_event_stmt_cleanup(rli, thd)))
-      rli->report(ERROR_LEVEL, error,
-                  "Error in %s event: commit of row events failed, "
-                  "table `%s`.`%s`",
-                  get_type_str(), m_table->s->db.str,
-                  m_table->s->table_name.str);
-
+  if (get_flags(STMT_END_F) && (error= rows_event_stmt_cleanup(rli, thd)))
+    slave_rows_error_report(ERROR_LEVEL,
+                            thd->is_error() ? 0 : error,
+                            rli, thd, table,
+                            get_type_str(),
+                            RPL_LOG_NAME, (ulong) log_pos);
   DBUG_RETURN(error);
 }
 

=== modified file 'sql/slave.cc'
--- a/sql/slave.cc	2010-07-20 18:07:36 +0000
+++ b/sql/slave.cc	2010-09-26 13:05:05 +0000
@@ -2343,7 +2343,7 @@ static int exec_relay_log_event(THD* thd
           else
           {
             exec_res= 0;
-            end_trans(thd, ROLLBACK);
+            const_cast<Relay_log_info*>(rli)->cleanup_context(thd, 1);
             /* chance for concurrent connection to get more locks */
             safe_sleep(thd, min(rli->trans_retries, MAX_SLAVE_RETRY_PAUSE),
                        (CHECK_KILLED_FUNC)sql_slave_killed, (void*)rli);
@@ -3158,6 +3158,7 @@ the slave SQL thread with \"SLAVE START\
     request is detected only by the present function, not by events), so we
     must "proactively" clear playgrounds:
   */
+  thd->clear_error();
   rli->cleanup_context(thd, 1);
   /*
     Some extra safety, which should not been needed (normally, event deletion


Attachment: [text/bzr-bundle] bzr/li-bing.song@sun.com-20100926130505-6oeitusqoqhhiio8.bundle
Thread
bzr commit into mysql-5.1-bugteam branch (Li-Bing.Song:3519) Li-Bing.Song26 Sep