List:Commits« Previous MessageNext Message »
From:Li-Bing.Song Date:January 27 2010 7:49am
Subject:bzr commit into mysql-5.1-rep-semisync branch (Li-Bing.Song:3127) Bug#50157
View as plain text  
#At file:///home/anders/work/bzrroot/mysql-5.1-rep-semisync/ based on revid:zhenxing.he@stripped

 3127 Li-Bing.Song@stripped	2010-01-27
      BUG#50157 Assertion !active_tranxs_->is_tranx_end_pos(..) in ReplSemiSyncMaster::commitTrx
      
      The root cause of the crash is that a TranxNode is freed before it is used.
      A TranxNode is allocated and insertted into the active list each time 
      when some log events are written into binlog file and is flushed. 
      The memory for TranxNode is allocted with thd_alloc and will be freed 
      after at the end of the statement. The after_commit/after_rollback callback
      was supposed to be call before the end of each statement and remove the node from
      the active list. However this assumption is not correct in all cases(e.g. CREATE
      TEMPORARY ... SELECT), and can cause the memory allocated for TranxNode be freed
      before it was removed from the active list. So The TranxNode pointer in the active
      list would become a wild pointer and cause the crash.
      
      After this patch, The memory for TranxNode will be allocated by my_malloc.
     @ sql/rpl_handler.cc
        params are not initialized.

    modified:
      mysql-test/suite/rpl/r/rpl_semi_sync.result
      mysql-test/suite/rpl/t/rpl_semi_sync.test
      plugin/semisync/semisync_master.cc
      plugin/semisync/semisync_master.h
      sql/rpl_handler.cc
=== modified file 'mysql-test/suite/rpl/r/rpl_semi_sync.result'
--- a/mysql-test/suite/rpl/r/rpl_semi_sync.result	2009-10-23 04:56:30 +0000
+++ b/mysql-test/suite/rpl/r/rpl_semi_sync.result	2010-01-27 07:48:44 +0000
@@ -120,8 +120,27 @@ min(a)
 select max(a) from t1;
 max(a)
 300
+
+# BUG#50157
+# semi-sync replication crashes when replicating a transaction which
+# include 'CREATE TEMPORARY TABLE `MyISAM_t` SELECT * FROM `Innodb_t` ;
+[ on master ]
+SET SESSION AUTOCOMMIT= 0;
+CREATE TABLE t2(c1 INT) ENGINE=innodb;
+BEGIN;
+
+# Even though it is in a transaction, this statement is binlogged into binlog
+# file immediately.
+CREATE TEMPORARY TABLE t3 SELECT c1 FROM t2 where 1=1;
+
+# These statements will not be binlogged until the transaction is committed
+INSERT INTO t2 VALUES(11);
+INSERT INTO t2 VALUES(22);
+COMMIT;
+DROP TABLE t2, t3;
+SET SESSION AUTOCOMMIT= 1;
 #
-# Test semi-sync master will switch OFF after one transacton
+# Test semi-sync master will switch OFF after one transaction
 # timeout waiting for slave reply.
 #
 include/stop_slave.inc
@@ -135,7 +154,7 @@ Variable_name	Value
 Rpl_semi_sync_master_no_tx	0
 show status like 'Rpl_semi_sync_master_yes_tx';
 Variable_name	Value
-Rpl_semi_sync_master_yes_tx	301
+Rpl_semi_sync_master_yes_tx	304
 show status like 'Rpl_semi_sync_master_clients';
 Variable_name	Value
 Rpl_semi_sync_master_clients	1
@@ -150,7 +169,7 @@ Variable_name	Value
 Rpl_semi_sync_master_no_tx	1
 show status like 'Rpl_semi_sync_master_yes_tx';
 Variable_name	Value
-Rpl_semi_sync_master_yes_tx	301
+Rpl_semi_sync_master_yes_tx	304
 insert into t1 values (100);
 [ master status should be OFF ]
 show status like 'Rpl_semi_sync_master_status';
@@ -161,7 +180,7 @@ Variable_name	Value
 Rpl_semi_sync_master_no_tx	302
 show status like 'Rpl_semi_sync_master_yes_tx';
 Variable_name	Value
-Rpl_semi_sync_master_yes_tx	301
+Rpl_semi_sync_master_yes_tx	304
 #
 # Test semi-sync status on master will be ON again when slave catches up
 #
@@ -194,7 +213,7 @@ Variable_name	Value
 Rpl_semi_sync_master_no_tx	302
 show status like 'Rpl_semi_sync_master_yes_tx';
 Variable_name	Value
-Rpl_semi_sync_master_yes_tx	301
+Rpl_semi_sync_master_yes_tx	304
 show status like 'Rpl_semi_sync_master_clients';
 Variable_name	Value
 Rpl_semi_sync_master_clients	1
@@ -213,7 +232,7 @@ Variable_name	Value
 Rpl_semi_sync_master_no_tx	302
 SHOW STATUS LIKE 'Rpl_semi_sync_master_yes_tx';
 Variable_name	Value
-Rpl_semi_sync_master_yes_tx	302
+Rpl_semi_sync_master_yes_tx	305
 FLUSH NO_WRITE_TO_BINLOG STATUS;
 [ Semi-sync master status variables after FLUSH STATUS ]
 SHOW STATUS LIKE 'Rpl_semi_sync_master_no_tx';

=== modified file 'mysql-test/suite/rpl/t/rpl_semi_sync.test'
--- a/mysql-test/suite/rpl/t/rpl_semi_sync.test	2009-10-23 04:56:30 +0000
+++ b/mysql-test/suite/rpl/t/rpl_semi_sync.test	2010-01-27 07:48:44 +0000
@@ -193,8 +193,36 @@ select count(distinct a) from t1;
 select min(a) from t1;
 select max(a) from t1;
 
+--echo
+--echo # BUG#50157
+--echo # semi-sync replication crashes when replicating a transaction which
+--echo # include 'CREATE TEMPORARY TABLE `MyISAM_t` SELECT * FROM `Innodb_t` ;
+
+connection master;
+echo [ on master ];
+SET SESSION AUTOCOMMIT= 0;
+CREATE TABLE t2(c1 INT) ENGINE=innodb;
+sync_slave_with_master;
+
+connection master;
+BEGIN;
+--echo
+--echo # Even though it is in a transaction, this statement is binlogged into binlog
+--echo # file immediately.
+CREATE TEMPORARY TABLE t3 SELECT c1 FROM t2 where 1=1;
+--echo
+--echo # These statements will not be binlogged until the transaction is committed
+INSERT INTO t2 VALUES(11);
+INSERT INTO t2 VALUES(22);
+COMMIT;
+
+DROP TABLE t2, t3;
+SET SESSION AUTOCOMMIT= 1;
+sync_slave_with_master;
+
+
 --echo #
---echo # Test semi-sync master will switch OFF after one transacton
+--echo # Test semi-sync master will switch OFF after one transaction
 --echo # timeout waiting for slave reply.
 --echo #
 connection slave;

=== modified file 'plugin/semisync/semisync_master.cc'
--- a/plugin/semisync/semisync_master.cc	2009-12-04 05:43:38 +0000
+++ b/plugin/semisync/semisync_master.cc	2010-01-27 07:48:44 +0000
@@ -65,7 +65,7 @@ static int gettimeofday(struct timeval *
 
 ActiveTranx::ActiveTranx(pthread_mutex_t *lock,
 			 unsigned long trace_level)
-  : Trace(trace_level),
+  : Trace(trace_level), allocator_(max_connections),
     num_entries_(max_connections << 1), /* Transaction hash table size
                                          * is set to double the size
                                          * of max_connections */
@@ -115,25 +115,6 @@ unsigned int ActiveTranx::get_hash_value
   return (hash1 + hash2) % num_entries_;
 }
 
-ActiveTranx::TranxNode* ActiveTranx::alloc_tranx_node()
-{
-  MYSQL_THD thd= (MYSQL_THD)current_thd;
-  /* The memory allocated for TranxNode will be automatically freed at
-     the end of the command of current THD. And because
-     ha_autocommit_or_rollback() will always be called before that, so
-     we are sure that the node will be removed from the active list
-     before it get freed. */
-  TranxNode *trx_node = (TranxNode *)thd_alloc(thd, sizeof(TranxNode));
-  if (trx_node)
-  {
-    trx_node->log_name_[0] = '\0';
-    trx_node->log_pos_= 0;
-    trx_node->next_= 0;
-    trx_node->hash_next_= 0;
-  }
-  return trx_node;
-}
-
 int ActiveTranx::compare(const char *log_file_name1, my_off_t log_file_pos1,
 			 const char *log_file_name2, my_off_t log_file_pos2)
 {
@@ -159,7 +140,7 @@ int ActiveTranx::insert_tranx_node(const
 
   function_enter(kWho);
 
-  ins_node = alloc_tranx_node();
+  ins_node = allocator_.allocate_node();
   if (!ins_node)
   {
     sql_print_error("%s: transaction node allocation failed for: (%s, %lu)",
@@ -271,6 +252,7 @@ int ActiveTranx::clear_active_tranx_node
 
     /* Clear the hash table. */
     memset(trx_htb_, 0, num_entries_ * sizeof(TranxNode *));
+    allocator_.free_all_nodes();
 
     /* Clear the active transaction list. */
     if (trx_front_ != NULL)
@@ -311,6 +293,7 @@ int ActiveTranx::clear_active_tranx_node
     }
 
     trx_front_ = new_front;
+    allocator_.free_nodes_before(trx_front_);
 
     if (trace_level_ & kTraceDetail)
       sql_print_information("%s: cleared %d nodes back until pos (%s, %lu)",

=== modified file 'plugin/semisync/semisync_master.h'
--- a/plugin/semisync/semisync_master.h	2009-12-04 01:46:33 +0000
+++ b/plugin/semisync/semisync_master.h	2010-01-27 07:48:44 +0000
@@ -20,6 +20,196 @@
 
 #include "semisync.h"
 
+struct TranxNode {
+  char             log_name_[FN_REFLEN];
+  my_off_t          log_pos_;
+  struct TranxNode *next_;            /* the next node in the sorted list */
+  struct TranxNode *hash_next_;    /* the next node during hash collision */
+};
+
+/*
+  TranxNodeAllocator provides memory allocating and freeing methods for
+  TranxNode.
+
+  BLOCK_TRANX_NODES
+    The number of TranxNodes which are in a block.
+  Block
+    A sequence memory which contain BLOCK_TRANX_NODES TranxNodes.
+    The Block link table is composed of number of Blocks.
+  last_node
+    it always points to the last node in current_block which is in use.
+  current_block
+    Part of its nodes are free and others are used. The Blocks before it are
+    all in use and the blocks after it are all free.
+  allocate_node
+    The pointer of the first free node in current_block is returned to the
+    caller. current_block will move to the next free block when all nodes of
+    it are in use. A new block is allocated and is put into the rear of the
+    Block link table if no block is free. At first, there is no any Block in
+    the Block link table. A Block is allocated and is put into the Block link
+    table;
+  free_nodes_before(TranxNode *node)
+    All blocks before the block which owneres the node gaving by the caller are
+    put into the rear of the Block link table. The block containing
+    the node gaving by the caller is not move to the rear of the Block link
+    table, for some nodes of its are still in use. We only say a block is free
+    if all its node are free. This will waste at most one block, but it is more
+    efficient.
+ */
+#define BLOCK_TRANX_NODES 16
+class TranxNodeAllocator
+{
+public:
+  /*
+    init_nodes
+      How many TranxNodes are usually need.
+    max_reserved_block_num
+      How many blocks are need to cantain the TranxNodes of init_nodes.
+      Some free Blocks will be freed if there are too many Blocks in the
+      Block link table.
+   */
+  TranxNodeAllocator(uint init_nodes) :
+    max_reserved_block_num(init_nodes/BLOCK_TRANX_NODES +
+                  (init_nodes % BLOCK_TRANX_NODES > 1 ? 2 : 1)),
+    last_node(-1), first_block(NULL), last_block(NULL),
+    current_block(NULL), block_num(0) {}
+
+  ~TranxNodeAllocator()
+  {
+    Block *block= first_block;
+    while (block != NULL)
+    {
+      Block *next= block->next;
+      free_block(block);
+      block= next;
+    }
+  }
+
+  TranxNode *allocate_node()
+  {
+    TranxNode *trx_node;
+    Block *block= current_block;
+
+    if (last_node == BLOCK_TRANX_NODES-1)
+    {
+      current_block= current_block->next;
+      last_node= -1;
+    }
+
+    if (current_block == NULL && allocate_block())
+    {
+      current_block= block;
+      if (current_block)
+        last_node= BLOCK_TRANX_NODES-1;
+      return NULL;
+    }
+
+    trx_node= &(current_block->nodes[++last_node]);
+    trx_node->log_name_[0] = '\0';
+    trx_node->log_pos_= 0;
+    trx_node->next_= 0;
+    trx_node->hash_next_= 0;
+    return trx_node;
+  }
+
+  int free_all_nodes()
+  {
+    current_block= first_block;
+    last_node= -1;
+    free_blocks();
+    return 0;
+  }
+
+  int free_nodes_before(TranxNode* node)
+  {
+    Block *block;
+    Block *prev_block;
+
+    block= first_block;
+    while (block != current_block->next)
+    {
+      if (&(block->nodes[0]) <= node && &(block->nodes[BLOCK_TRANX_NODES]) >= node)
+      {
+        if (first_block != block)
+        {
+          last_block->next= first_block;
+          first_block= block;
+          last_block= prev_block;
+          last_block->next= NULL;
+          free_blocks();
+        }
+        return 0;
+      }
+      prev_block= block;
+      block= block->next;
+    }
+
+    /* Node does not find should never happen */
+    DBUG_ASSERT(0);
+    return 1;
+  }
+
+private:
+  uint max_reserved_block_num;
+  int last_node;
+
+  struct Block {
+    Block *next;
+    TranxNode nodes[BLOCK_TRANX_NODES];
+  };
+  Block *first_block;
+  Block *last_block;
+  Block *current_block;
+  uint block_num;
+
+  int allocate_block()
+  {
+    Block *block= (Block *)my_malloc(sizeof(Block), MYF(0));
+    if (block)
+    {
+      block->next= NULL;
+
+      if (first_block == NULL)
+        first_block= block;
+      else
+        last_block->next= block;
+
+      /* New block always is put at the rear */
+      last_block= block;
+      /* New block always is the current_block */
+      current_block= block;
+      ++block_num;
+      return 0;
+    }
+    return 1;
+  }
+
+  void free_block(Block *block)
+  {
+    my_free(block, MYF(0));
+    --block_num;
+  }
+
+  void free_blocks()
+  {
+    if (current_block == NULL || current_block->next == NULL)
+      return;
+
+    /* One free block is always kept behind the current block */
+    Block *block= current_block->next->next;
+    while (block_num > max_reserved_block_num && block != NULL)
+    {
+      Block *next= block->next;
+      free_block(block);
+      block= next;
+    }
+    current_block->next= block;
+    if (block == NULL)
+      last_block= current_block;
+  }
+};
+
+
 /**
    This class manages memory for active transaction list.
 
@@ -31,13 +221,8 @@
 class ActiveTranx
   :public Trace {
 private:
-  struct TranxNode {
-    char             log_name_[FN_REFLEN];
-    my_off_t          log_pos_;
-    struct TranxNode *next_;            /* the next node in the sorted list */
-    struct TranxNode *hash_next_;    /* the next node during hash collision */
-  };
 
+  TranxNodeAllocator allocator_;
   /* These two record the active transaction list in sort order. */
   TranxNode       *trx_front_, *trx_rear_;
 
@@ -48,24 +233,22 @@ private:
 
   inline void assert_lock_owner();
 
-  inline TranxNode* alloc_tranx_node();
-
   inline unsigned int calc_hash(const unsigned char *key,unsigned int length);
   unsigned int get_hash_value(const char *log_file_name, my_off_t log_file_pos);
 
   int compare(const char *log_file_name1, my_off_t log_file_pos1,
-	      const TranxNode *node2) {
+              const TranxNode *node2) {
     return compare(log_file_name1, log_file_pos1,
-		   node2->log_name_, node2->log_pos_);
+                   node2->log_name_, node2->log_pos_);
   }
   int compare(const TranxNode *node1,
-	      const char *log_file_name2, my_off_t log_file_pos2) {
+              const char *log_file_name2, my_off_t log_file_pos2) {
     return compare(node1->log_name_, node1->log_pos_,
-		   log_file_name2, log_file_pos2);
+                   log_file_name2, log_file_pos2);
   }
   int compare(const TranxNode *node1, const TranxNode *node2) {
     return compare(node1->log_name_, node1->log_pos_,
-		   node2->log_name_, node2->log_pos_);
+                   node2->log_name_, node2->log_pos_);
   }
 
 public:
@@ -88,7 +271,7 @@ public:
    *  0: success;  non-zero: error
    */
   int clear_active_tranx_nodes(const char *log_file_name,
-			       my_off_t    log_file_pos);
+                               my_off_t    log_file_pos);
 
   /* Given a position, check to see whether the position is an active
    * transaction's ending position by probing the hash table.
@@ -99,7 +282,7 @@ public:
    * (file_name, file_position).
    */
   static int compare(const char *log_file_name1, my_off_t log_file_pos1,
-		     const char *log_file_name2, my_off_t log_file_pos2);
+                     const char *log_file_name2, my_off_t log_file_pos2);
 
 };
 

=== modified file 'sql/rpl_handler.cc'
--- a/sql/rpl_handler.cc	2009-12-04 01:46:33 +0000
+++ b/sql/rpl_handler.cc	2010-01-27 07:48:44 +0000
@@ -190,8 +190,8 @@ int Trans_delegate::after_commit(THD *th
 {
   Trans_param param;
   bool is_real_trans= (all || thd->transaction.all.ha_list == 0);
-  if (is_real_trans)
-    param.flags |= TRANS_IS_REAL_TRANS;
+
+  param.flags = is_real_trans ? TRANS_IS_REAL_TRANS : 0;
 
   Trans_binlog_info *log_info=
     my_pthread_getspecific_ptr(Trans_binlog_info*, RPL_TRANS_BINLOG_INFO);
@@ -218,8 +218,8 @@ int Trans_delegate::after_rollback(THD *
 {
   Trans_param param;
   bool is_real_trans= (all || thd->transaction.all.ha_list == 0);
-  if (is_real_trans)
-    param.flags |= TRANS_IS_REAL_TRANS;
+
+  param.flags = is_real_trans ? TRANS_IS_REAL_TRANS : 0;
 
   Trans_binlog_info *log_info=
     my_pthread_getspecific_ptr(Trans_binlog_info*, RPL_TRANS_BINLOG_INFO);
@@ -228,7 +228,7 @@ int Trans_delegate::after_rollback(THD *
   param.log_pos= log_info ? log_info->log_pos : 0;
 
   int ret= 0;
-  FOREACH_OBSERVER(ret, after_commit, thd, (&param));
+  FOREACH_OBSERVER(ret, after_rollback, thd, (&param));
 
   /*
     This is the end of a real transaction or autocommit statement, we


Attachment: [text/bzr-bundle] bzr/li-bing.song@sun.com-20100127074844-tosnyrogmy6lux1b.bundle
Thread
bzr commit into mysql-5.1-rep-semisync branch (Li-Bing.Song:3127) Bug#50157Li-Bing.Song27 Jan
  • Re: bzr commit into mysql-5.1-rep-semisync branch (Li-Bing.Song:3127)Bug#50157He Zhenxing28 Jan
    • Re: bzr commit into mysql-5.1-rep-semisync branch (Li-Bing.Song:3127)Bug#50157Libing Song29 Jan
      • Re: bzr commit into mysql-5.1-rep-semisync branch (Li-Bing.Song:3127)Bug#50157He Zhenxing29 Jan
  • Re: bzr commit into mysql-5.1-rep-semisync branch (Li-Bing.Song:3127)Bug#50157Luís Soares28 Jan
    • Re: bzr commit into mysql-5.1-rep-semisync branch (Li-Bing.Song:3127)Bug#50157Libing Song29 Jan
      • Re: bzr commit into mysql-5.1-rep-semisync branch (Li-Bing.Song:3127)Bug#50157Luís Soares29 Jan