List:Commits« Previous MessageNext Message »
From:He Zhenxing Date:January 28 2010 4:07am
Subject:Re: bzr commit into mysql-5.1-rep-semisync branch (Li-Bing.Song:3127)
Bug#50157
View as plain text  
Hi Libing,

Nice work!

STATUS
------
Approved after fix RC1.

REQUIRED CHANGES
----------------
RC1. In free_blocks():

   current_block->next= block;
   if (block == NULL)
     last_block= current_block;

I think the above should be:

   current_block->next->next= block;
   if (block == NULL)
     last_block= current_block->next;

REQUESTS
--------
N/A

SUGGESTIONS
-----------

S1. Please use the doxygen comment style.

Please look for some inline coments.

Li-Bing.Song@stripped wrote:
> #At file:///home/anders/work/bzrroot/mysql-5.1-rep-semisync/ based on
> revid:zhenxing.he@stripped
> 
>  3127 Li-Bing.Song@stripped	2010-01-27
>       BUG#50157 Assertion !active_tranxs_->is_tranx_end_pos(..) in
> ReplSemiSyncMaster::commitTrx
>       
>       The root cause of the crash is that a TranxNode is freed before it is used.
>       A TranxNode is allocated and insertted into the active list each time 
>       when some log events are written into binlog file and is flushed. 
>       The memory for TranxNode is allocted with thd_alloc and will be freed 
>       after at the end of the statement. The after_commit/after_rollback callback
>       was supposed to be call before the end of each statement and remove the node
> from
>       the active list. However this assumption is not correct in all cases(e.g.
> CREATE
>       TEMPORARY ... SELECT), and can cause the memory allocated for TranxNode be
> freed
>       before it was removed from the active list. So The TranxNode pointer in the
> active
>       list would become a wild pointer and cause the crash.
>       
>       After this patch, The memory for TranxNode will be allocated by my_malloc.
>      @ sql/rpl_handler.cc
>         params are not initialized.
> 
>     modified:
>       mysql-test/suite/rpl/r/rpl_semi_sync.result
>       mysql-test/suite/rpl/t/rpl_semi_sync.test
>       plugin/semisync/semisync_master.cc
>       plugin/semisync/semisync_master.h
>       sql/rpl_handler.cc
> === modified file 'mysql-test/suite/rpl/r/rpl_semi_sync.result'
> --- a/mysql-test/suite/rpl/r/rpl_semi_sync.result	2009-10-23 04:56:30 +0000
> +++ b/mysql-test/suite/rpl/r/rpl_semi_sync.result	2010-01-27 07:48:44 +0000
> @@ -120,8 +120,27 @@ min(a)
>  select max(a) from t1;
>  max(a)
>  300
> +
> +# BUG#50157
> +# semi-sync replication crashes when replicating a transaction which
> +# include 'CREATE TEMPORARY TABLE `MyISAM_t` SELECT * FROM `Innodb_t` ;
> +[ on master ]
> +SET SESSION AUTOCOMMIT= 0;
> +CREATE TABLE t2(c1 INT) ENGINE=innodb;
> +BEGIN;
> +
> +# Even though it is in a transaction, this statement is binlogged into binlog
> +# file immediately.
> +CREATE TEMPORARY TABLE t3 SELECT c1 FROM t2 where 1=1;
> +
> +# These statements will not be binlogged until the transaction is committed
> +INSERT INTO t2 VALUES(11);
> +INSERT INTO t2 VALUES(22);
> +COMMIT;
> +DROP TABLE t2, t3;
> +SET SESSION AUTOCOMMIT= 1;
>  #
> -# Test semi-sync master will switch OFF after one transacton
> +# Test semi-sync master will switch OFF after one transaction
>  # timeout waiting for slave reply.
>  #
>  include/stop_slave.inc
> @@ -135,7 +154,7 @@ Variable_name	Value
>  Rpl_semi_sync_master_no_tx	0
>  show status like 'Rpl_semi_sync_master_yes_tx';
>  Variable_name	Value
> -Rpl_semi_sync_master_yes_tx	301
> +Rpl_semi_sync_master_yes_tx	304
>  show status like 'Rpl_semi_sync_master_clients';
>  Variable_name	Value
>  Rpl_semi_sync_master_clients	1
> @@ -150,7 +169,7 @@ Variable_name	Value
>  Rpl_semi_sync_master_no_tx	1
>  show status like 'Rpl_semi_sync_master_yes_tx';
>  Variable_name	Value
> -Rpl_semi_sync_master_yes_tx	301
> +Rpl_semi_sync_master_yes_tx	304
>  insert into t1 values (100);
>  [ master status should be OFF ]
>  show status like 'Rpl_semi_sync_master_status';
> @@ -161,7 +180,7 @@ Variable_name	Value
>  Rpl_semi_sync_master_no_tx	302
>  show status like 'Rpl_semi_sync_master_yes_tx';
>  Variable_name	Value
> -Rpl_semi_sync_master_yes_tx	301
> +Rpl_semi_sync_master_yes_tx	304
>  #
>  # Test semi-sync status on master will be ON again when slave catches up
>  #
> @@ -194,7 +213,7 @@ Variable_name	Value
>  Rpl_semi_sync_master_no_tx	302
>  show status like 'Rpl_semi_sync_master_yes_tx';
>  Variable_name	Value
> -Rpl_semi_sync_master_yes_tx	301
> +Rpl_semi_sync_master_yes_tx	304
>  show status like 'Rpl_semi_sync_master_clients';
>  Variable_name	Value
>  Rpl_semi_sync_master_clients	1
> @@ -213,7 +232,7 @@ Variable_name	Value
>  Rpl_semi_sync_master_no_tx	302
>  SHOW STATUS LIKE 'Rpl_semi_sync_master_yes_tx';
>  Variable_name	Value
> -Rpl_semi_sync_master_yes_tx	302
> +Rpl_semi_sync_master_yes_tx	305
>  FLUSH NO_WRITE_TO_BINLOG STATUS;
>  [ Semi-sync master status variables after FLUSH STATUS ]
>  SHOW STATUS LIKE 'Rpl_semi_sync_master_no_tx';
> 
> === modified file 'mysql-test/suite/rpl/t/rpl_semi_sync.test'
> --- a/mysql-test/suite/rpl/t/rpl_semi_sync.test	2009-10-23 04:56:30 +0000
> +++ b/mysql-test/suite/rpl/t/rpl_semi_sync.test	2010-01-27 07:48:44 +0000
> @@ -193,8 +193,36 @@ select count(distinct a) from t1;
>  select min(a) from t1;
>  select max(a) from t1;
>  
> +--echo
> +--echo # BUG#50157
> +--echo # semi-sync replication crashes when replicating a transaction which
> +--echo # include 'CREATE TEMPORARY TABLE `MyISAM_t` SELECT * FROM `Innodb_t` ;
> +
> +connection master;
> +echo [ on master ];
> +SET SESSION AUTOCOMMIT= 0;
> +CREATE TABLE t2(c1 INT) ENGINE=innodb;
> +sync_slave_with_master;
> +
> +connection master;
> +BEGIN;
> +--echo
> +--echo # Even though it is in a transaction, this statement is binlogged into
> binlog
> +--echo # file immediately.
> +CREATE TEMPORARY TABLE t3 SELECT c1 FROM t2 where 1=1;
> +--echo
> +--echo # These statements will not be binlogged until the transaction is committed
> +INSERT INTO t2 VALUES(11);
> +INSERT INTO t2 VALUES(22);
> +COMMIT;
> +
> +DROP TABLE t2, t3;
> +SET SESSION AUTOCOMMIT= 1;
> +sync_slave_with_master;
> +
> +
>  --echo #
> ---echo # Test semi-sync master will switch OFF after one transacton
> +--echo # Test semi-sync master will switch OFF after one transaction
>  --echo # timeout waiting for slave reply.
>  --echo #
>  connection slave;
> 
> === modified file 'plugin/semisync/semisync_master.cc'
> --- a/plugin/semisync/semisync_master.cc	2009-12-04 05:43:38 +0000
> +++ b/plugin/semisync/semisync_master.cc	2010-01-27 07:48:44 +0000
> @@ -65,7 +65,7 @@ static int gettimeofday(struct timeval *
>  
>  ActiveTranx::ActiveTranx(pthread_mutex_t *lock,
>  			 unsigned long trace_level)
> -  : Trace(trace_level),
> +  : Trace(trace_level), allocator_(max_connections),
>      num_entries_(max_connections << 1), /* Transaction hash table size
>                                           * is set to double the size
>                                           * of max_connections */
> @@ -115,25 +115,6 @@ unsigned int ActiveTranx::get_hash_value
>    return (hash1 + hash2) % num_entries_;
>  }
>  
> -ActiveTranx::TranxNode* ActiveTranx::alloc_tranx_node()
> -{
> -  MYSQL_THD thd= (MYSQL_THD)current_thd;
> -  /* The memory allocated for TranxNode will be automatically freed at
> -     the end of the command of current THD. And because
> -     ha_autocommit_or_rollback() will always be called before that, so
> -     we are sure that the node will be removed from the active list
> -     before it get freed. */
> -  TranxNode *trx_node = (TranxNode *)thd_alloc(thd, sizeof(TranxNode));
> -  if (trx_node)
> -  {
> -    trx_node->log_name_[0] = '\0';
> -    trx_node->log_pos_= 0;
> -    trx_node->next_= 0;
> -    trx_node->hash_next_= 0;
> -  }
> -  return trx_node;
> -}
> -
>  int ActiveTranx::compare(const char *log_file_name1, my_off_t log_file_pos1,
>  			 const char *log_file_name2, my_off_t log_file_pos2)
>  {
> @@ -159,7 +140,7 @@ int ActiveTranx::insert_tranx_node(const
>  
>    function_enter(kWho);
>  
> -  ins_node = alloc_tranx_node();
> +  ins_node = allocator_.allocate_node();
>    if (!ins_node)
>    {
>      sql_print_error("%s: transaction node allocation failed for: (%s, %lu)",
> @@ -271,6 +252,7 @@ int ActiveTranx::clear_active_tranx_node
>  
>      /* Clear the hash table. */
>      memset(trx_htb_, 0, num_entries_ * sizeof(TranxNode *));
> +    allocator_.free_all_nodes();
>  
>      /* Clear the active transaction list. */
>      if (trx_front_ != NULL)
> @@ -311,6 +293,7 @@ int ActiveTranx::clear_active_tranx_node
>      }
>  
>      trx_front_ = new_front;
> +    allocator_.free_nodes_before(trx_front_);
>  
>      if (trace_level_ & kTraceDetail)
>        sql_print_information("%s: cleared %d nodes back until pos (%s, %lu)",
> 
> === modified file 'plugin/semisync/semisync_master.h'
> --- a/plugin/semisync/semisync_master.h	2009-12-04 01:46:33 +0000
> +++ b/plugin/semisync/semisync_master.h	2010-01-27 07:48:44 +0000
> @@ -20,6 +20,196 @@
>  
>  #include "semisync.h"
>  
> +struct TranxNode {
> +  char             log_name_[FN_REFLEN];
> +  my_off_t          log_pos_;
> +  struct TranxNode *next_;            /* the next node in the sorted list */
> +  struct TranxNode *hash_next_;    /* the next node during hash collision */
> +};
> +
> +/*
> +  TranxNodeAllocator provides memory allocating and freeing methods for
> +  TranxNode.
> +
> +  BLOCK_TRANX_NODES
> +    The number of TranxNodes which are in a block.
> +  Block
> +    A sequence memory which contain BLOCK_TRANX_NODES TranxNodes.
> +    The Block link table is composed of number of Blocks.
> +  last_node
> +    it always points to the last node in current_block which is in use.
> +  current_block
> +    Part of its nodes are free and others are used. The Blocks before it are
> +    all in use and the blocks after it are all free.
> +  allocate_node
> +    The pointer of the first free node in current_block is returned to the
> +    caller. current_block will move to the next free block when all nodes of
> +    it are in use. A new block is allocated and is put into the rear of the
> +    Block link table if no block is free. At first, there is no any Block in
> +    the Block link table. A Block is allocated and is put into the Block link
> +    table;
> +  free_nodes_before(TranxNode *node)
> +    All blocks before the block which owneres the node gaving by the caller are
> +    put into the rear of the Block link table. The block containing
> +    the node gaving by the caller is not move to the rear of the Block link
> +    table, for some nodes of its are still in use. We only say a block is free
> +    if all its node are free. This will waste at most one block, but it is more
> +    efficient.
> + */
> +#define BLOCK_TRANX_NODES 16
> +class TranxNodeAllocator
> +{
> +public:
> +  /*
> +    init_nodes
> +      How many TranxNodes are usually need.

The number of reserved TranxNode, when freeing memory, we will reserved
at least this number of nodes not freed.

So maybe a better name for this is 'reserved_nodes'

> +    max_reserved_block_num
> +      How many blocks are need to cantain the TranxNodes of init_nodes.

The number of reserved blocks that will not be freed.

> +      Some free Blocks will be freed if there are too many Blocks in the
> +      Block link table.

When freeing blocks, we will reserve this number of blocks not freed.

> +   */
> +  TranxNodeAllocator(uint init_nodes) :
> +    max_reserved_block_num(init_nodes/BLOCK_TRANX_NODES +
> +                  (init_nodes % BLOCK_TRANX_NODES > 1 ? 2 : 1)),
> +    last_node(-1), first_block(NULL), last_block(NULL),
> +    current_block(NULL), block_num(0) {}
> +
> +  ~TranxNodeAllocator()
> +  {
> +    Block *block= first_block;
> +    while (block != NULL)
> +    {
> +      Block *next= block->next;
> +      free_block(block);
> +      block= next;
> +    }
> +  }
> +
> +  TranxNode *allocate_node()
> +  {
> +    TranxNode *trx_node;
> +    Block *block= current_block;
> +
> +    if (last_node == BLOCK_TRANX_NODES-1)
> +    {
> +      current_block= current_block->next;
> +      last_node= -1;
> +    }
> +
> +    if (current_block == NULL && allocate_block())
> +    {
> +      current_block= block;
> +      if (current_block)
> +        last_node= BLOCK_TRANX_NODES-1;
> +      return NULL;
> +    }
> +

I'd suggest:

if (last_node == BLOCK_TRANX_NODES-1)
{
   if (current_block->next != NULL)
      current_block = current_block->next;
   else if (allocate_block())
      return NULL;
   last_node = -1;
}

> +    trx_node= &(current_block->nodes[++last_node]);
> +    trx_node->log_name_[0] = '\0';
> +    trx_node->log_pos_= 0;
> +    trx_node->next_= 0;
> +    trx_node->hash_next_= 0;
> +    return trx_node;
> +  }
> +
> +  int free_all_nodes()
> +  {
> +    current_block= first_block;
> +    last_node= -1;
> +    free_blocks();
> +    return 0;
> +  }
> +
> +  int free_nodes_before(TranxNode* node)
> +  {
> +    Block *block;
> +    Block *prev_block;
> +
> +    block= first_block;
> +    while (block != current_block->next)
> +    {
> +      if (&(block->nodes[0]) <= node &&
> &(block->nodes[BLOCK_TRANX_NODES]) >= node)
> +      {
> +        if (first_block != block)
> +        {
> +          last_block->next= first_block;
> +          first_block= block;
> +          last_block= prev_block;
> +          last_block->next= NULL;
> +          free_blocks();
> +        }
> +        return 0;
> +      }
> +      prev_block= block;
> +      block= block->next;
> +    }
> +
> +    /* Node does not find should never happen */
> +    DBUG_ASSERT(0);
> +    return 1;
> +  }
> +
> +private:
> +  uint max_reserved_block_num;
> +  int last_node;
> +
> +  struct Block {
> +    Block *next;
> +    TranxNode nodes[BLOCK_TRANX_NODES];
> +  };
> +  Block *first_block;
> +  Block *last_block;
> +  Block *current_block;
> +  uint block_num;
> +
> +  int allocate_block()
> +  {
> +    Block *block= (Block *)my_malloc(sizeof(Block), MYF(0));
> +    if (block)
> +    {
> +      block->next= NULL;
> +
> +      if (first_block == NULL)
> +        first_block= block;
> +      else
> +        last_block->next= block;
> +
> +      /* New block always is put at the rear */
> +      last_block= block;
> +      /* New block always is the current_block */
> +      current_block= block;
> +      ++block_num;
> +      return 0;
> +    }
> +    return 1;
> +  }
> +
> +  void free_block(Block *block)
> +  {
> +    my_free(block, MYF(0));
> +    --block_num;
> +  }
> +
> +  void free_blocks()
> +  {
> +    if (current_block == NULL || current_block->next == NULL)
> +      return;
> +
> +    /* One free block is always kept behind the current block */
> +    Block *block= current_block->next->next;
> +    while (block_num > max_reserved_block_num && block != NULL)
> +    {
> +      Block *next= block->next;
> +      free_block(block);
> +      block= next;
> +    }
> +    current_block->next= block;
> +    if (block == NULL)
> +      last_block= current_block;

See RC1.

> +  }
> +};
> +
> +
>  /**
>     This class manages memory for active transaction list.
>  
> @@ -31,13 +221,8 @@
>  class ActiveTranx
>    :public Trace {
>  private:
> -  struct TranxNode {
> -    char             log_name_[FN_REFLEN];
> -    my_off_t          log_pos_;
> -    struct TranxNode *next_;            /* the next node in the sorted list */
> -    struct TranxNode *hash_next_;    /* the next node during hash collision */
> -  };
>  
> +  TranxNodeAllocator allocator_;
>    /* These two record the active transaction list in sort order. */
>    TranxNode       *trx_front_, *trx_rear_;
>  
> @@ -48,24 +233,22 @@ private:
>  
>    inline void assert_lock_owner();
>  
> -  inline TranxNode* alloc_tranx_node();
> -
>    inline unsigned int calc_hash(const unsigned char *key,unsigned int length);
>    unsigned int get_hash_value(const char *log_file_name, my_off_t log_file_pos);
>  
>    int compare(const char *log_file_name1, my_off_t log_file_pos1,
> -	      const TranxNode *node2) {
> +              const TranxNode *node2) {
>      return compare(log_file_name1, log_file_pos1,
> -		   node2->log_name_, node2->log_pos_);
> +                   node2->log_name_, node2->log_pos_);
>    }
>    int compare(const TranxNode *node1,
> -	      const char *log_file_name2, my_off_t log_file_pos2) {
> +              const char *log_file_name2, my_off_t log_file_pos2) {
>      return compare(node1->log_name_, node1->log_pos_,
> -		   log_file_name2, log_file_pos2);
> +                   log_file_name2, log_file_pos2);
>    }
>    int compare(const TranxNode *node1, const TranxNode *node2) {
>      return compare(node1->log_name_, node1->log_pos_,
> -		   node2->log_name_, node2->log_pos_);
> +                   node2->log_name_, node2->log_pos_);
>    }
>  
>  public:
> @@ -88,7 +271,7 @@ public:
>     *  0: success;  non-zero: error
>     */
>    int clear_active_tranx_nodes(const char *log_file_name,
> -			       my_off_t    log_file_pos);
> +                               my_off_t    log_file_pos);
>  
>    /* Given a position, check to see whether the position is an active
>     * transaction's ending position by probing the hash table.
> @@ -99,7 +282,7 @@ public:
>     * (file_name, file_position).
>     */
>    static int compare(const char *log_file_name1, my_off_t log_file_pos1,
> -		     const char *log_file_name2, my_off_t log_file_pos2);
> +                     const char *log_file_name2, my_off_t log_file_pos2);
>  
>  };
>  
> 
> === modified file 'sql/rpl_handler.cc'
> --- a/sql/rpl_handler.cc	2009-12-04 01:46:33 +0000
> +++ b/sql/rpl_handler.cc	2010-01-27 07:48:44 +0000
> @@ -190,8 +190,8 @@ int Trans_delegate::after_commit(THD *th
>  {
>    Trans_param param;
>    bool is_real_trans= (all || thd->transaction.all.ha_list == 0);
> -  if (is_real_trans)
> -    param.flags |= TRANS_IS_REAL_TRANS;
> +
> +  param.flags = is_real_trans ? TRANS_IS_REAL_TRANS : 0;
>  
>    Trans_binlog_info *log_info=
>      my_pthread_getspecific_ptr(Trans_binlog_info*, RPL_TRANS_BINLOG_INFO);
> @@ -218,8 +218,8 @@ int Trans_delegate::after_rollback(THD *
>  {
>    Trans_param param;
>    bool is_real_trans= (all || thd->transaction.all.ha_list == 0);
> -  if (is_real_trans)
> -    param.flags |= TRANS_IS_REAL_TRANS;
> +
> +  param.flags = is_real_trans ? TRANS_IS_REAL_TRANS : 0;
>  
>    Trans_binlog_info *log_info=
>      my_pthread_getspecific_ptr(Trans_binlog_info*, RPL_TRANS_BINLOG_INFO);
> @@ -228,7 +228,7 @@ int Trans_delegate::after_rollback(THD *
>    param.log_pos= log_info ? log_info->log_pos : 0;
>  
>    int ret= 0;
> -  FOREACH_OBSERVER(ret, after_commit, thd, (&param));
> +  FOREACH_OBSERVER(ret, after_rollback, thd, (&param));
>  
>    /*
>      This is the end of a real transaction or autocommit statement, we
> 


Thread
bzr commit into mysql-5.1-rep-semisync branch (Li-Bing.Song:3127) Bug#50157Li-Bing.Song27 Jan
  • Re: bzr commit into mysql-5.1-rep-semisync branch (Li-Bing.Song:3127)Bug#50157He Zhenxing28 Jan
    • Re: bzr commit into mysql-5.1-rep-semisync branch (Li-Bing.Song:3127)Bug#50157Libing Song29 Jan
      • Re: bzr commit into mysql-5.1-rep-semisync branch (Li-Bing.Song:3127)Bug#50157He Zhenxing29 Jan
  • Re: bzr commit into mysql-5.1-rep-semisync branch (Li-Bing.Song:3127)Bug#50157Luís Soares28 Jan
    • Re: bzr commit into mysql-5.1-rep-semisync branch (Li-Bing.Song:3127)Bug#50157Libing Song29 Jan
      • Re: bzr commit into mysql-5.1-rep-semisync branch (Li-Bing.Song:3127)Bug#50157Luís Soares29 Jan