Below is the list of changes that have just been committed into a local
5.1 repository of mats. When mats does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html
ChangeSet@stripped, 2006-10-23 22:06:55+02:00, mats@romeo.(none) +5 -0
BUG#22864 (Rollback following CREATE... SELECT discards 'CREATE TABLE' from log):
Adding a rollback point which will be set for the CREATE-SELECT
statement only. Explicit rollbacks will truncate the transaction
cache to the rollback point and write the remaining data in the
transaction cache to the binary log.
mysql-test/t/rpl_row_create_table.test@stripped, 2006-10-23 22:06:49+02:00, mats@romeo.(none)
+28 -0
Adding test that the CREATE TABLE in a CREATE-SELECT that is rolled
back explicitly is logged.
sql/log.cc@stripped, 2006-10-23 22:06:49+02:00, mats@romeo.(none) +189 -27
Adding helper class to handle lock/unlock of mutexes using RAII.
Adding rollback point to allow partial rollback of transaction cache.
Factoring out code into write_cache() function to transaction cache
to binary log.
Adding function THD::binlog_flush_transaction_cache() to flush the
transaction cache to the binary log file.
Adding functons THD::binlog_{set,clear}_rollback_point() to set and
clear the rollback point in the transaction cache.
Factoring out code into binlog_set_stmt_begin() to set the beginning
of statement savepoint.
Clearing rollback point and before statement point when transaction
cache is truncated so that these points are out of range.
sql/log.h@stripped, 2006-10-23 22:06:49+02:00, mats@romeo.(none) +2 -0
Adding method MYSQL_BIN_LOG::write_cache().
sql/sql_class.h@stripped, 2006-10-23 22:06:49+02:00, mats@romeo.(none) +4 -0
Adding function THD::binlog_flush_transaction_cache().
Adding function THD::binlog_set_stmt_begin().
Adding functions THD::binlog_{set,clear}_rollback_point().
sql/sql_insert.cc@stripped, 2006-10-23 22:06:49+02:00, mats@romeo.(none) +7 -0
Setting rollback point on successful creation of table.
Clearing rollback point on CREATE-SELECT failure before calling
select_insert::send_error().
# This is a BitKeeper patch. What follows are the unified diffs for the
# set of deltas contained in the patch. The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User: mats
# Host: romeo.(none)
# Root: /home/bk/b22864-mysql-5.1-new-rpl
--- 1.238/sql/log.cc 2006-10-23 22:07:05 +02:00
+++ 1.239/sql/log.cc 2006-10-23 22:07:05 +02:00
@@ -82,13 +82,49 @@
}
/*
+ Helper class to hold a mutex for the duration of the
+ block.
+
+ Eliminates the need for explicit unlocking of mutexes on, e.g.,
+ error returns. On passing a null pointer, the sentry will not do
+ anything.
+ */
+class Mutex_sentry
+{
+public:
+ Mutex_sentry(pthread_mutex_t *mutex)
+ : m_mutex(mutex)
+ {
+ if (m_mutex)
+ pthread_mutex_lock(mutex);
+ }
+
+ ~Mutex_sentry()
+ {
+ if (m_mutex)
+ pthread_mutex_unlock(m_mutex);
+#ifndef DBUG_OFF
+ m_mutex= 0;
+#endif
+ }
+
+private:
+ pthread_mutex_t *m_mutex;
+
+ // It's not allowed to copy this object in any way
+ Mutex_sentry(Mutex_sentry const&);
+ void operator=(Mutex_sentry const&);
+};
+
+/*
Helper class to store binary log transaction data.
*/
class binlog_trx_data {
public:
binlog_trx_data()
#ifdef HAVE_ROW_BASED_REPLICATION
- : m_pending(0), before_stmt_pos(MY_OFF_T_UNDEF)
+ : m_pending(0), before_stmt_pos(MY_OFF_T_UNDEF),
+ rollback_point(MY_OFF_T_UNDEF)
#endif
{
trans_log.end_of_file= max_binlog_cache_size;
@@ -121,11 +157,19 @@
*/
void truncate(my_off_t pos)
{
+ DBUG_PRINT("info", ("truncating to position %lu", pos));
+ DBUG_PRINT("info", ("rollback_point=%lu, before_stmt_pos=%lu", pos));
#ifdef HAVE_ROW_BASED_REPLICATION
delete pending();
set_pending(0);
#endif
reinit_io_cache(&trans_log, WRITE_CACHE, pos, 0, 0);
+#ifdef HAVE_ROW_BASED_REPLICATION
+ if (pos < rollback_point)
+ rollback_point= MY_OFF_T_UNDEF;
+ if (pos < before_stmt_pos)
+ before_stmt_pos= MY_OFF_T_UNDEF;
+#endif
}
/*
@@ -137,6 +181,7 @@
truncate(0);
#ifdef HAVE_ROW_BASED_REPLICATION
before_stmt_pos= MY_OFF_T_UNDEF;
+ rollback_point= MY_OFF_T_UNDEF;
#endif
trans_log.end_of_file= max_binlog_cache_size;
}
@@ -168,6 +213,12 @@
Binlog position before the start of the current statement.
*/
my_off_t before_stmt_pos;
+
+ /*
+ Rollback point used when an initial portion of the transaction
+ cache should be flushed at rollback.
+ */
+ my_off_t rollback_point;
#endif
};
@@ -1417,11 +1468,33 @@
If rolling back a statement in a transaction, we truncate the
transaction cache to remove the statement.
+ A special case is when a CRETE-SELECT is executed under
+ row-based replication. In that case the create statement is
+ always first in the transaction cache (since there is an
+ implicit commit point just before the statement). In that case,
+ we truncate the transaction cache to the rollback point, and
+ then flush the transaction cache to disk (which will flush the
+ create statement, but not the rolled back rows).
*/
if (all || !(thd->options & (OPTION_BEGIN | OPTION_NOT_AUTOCOMMIT)))
+ {
+ /*
+ We are rolling back the entire transaction, but have a
+ rollback point set.
+ */
+ if (trx_data->rollback_point != MY_OFF_T_UNDEF)
+ {
+ trx_data->truncate(trx_data->rollback_point);
+ int error= mysql_bin_log.write_cache(&trx_data->trans_log, true, true);
+ if (error)
+ DBUG_RETURN(error);
+ }
trx_data->reset();
+ }
else
+ {
trx_data->truncate(trx_data->before_stmt_pos); // ...statement
+ }
/*
We need to step the table map version on a rollback to ensure
@@ -2010,7 +2083,7 @@
goto err;
/* command_type, thread_id */
- length= my_snprintf(buff, 32, "%5ld ", thread_id);
+ length= my_snprintf(buff, 32, "%5ld ", static_cast<long>(thread_id));
if (my_b_write(&log_file, (byte*) buff, length))
goto err;
@@ -3338,18 +3411,7 @@
if (trx_data == NULL ||
trx_data->before_stmt_pos == MY_OFF_T_UNDEF)
{
- /*
- The call to binlog_trans_log_savepos() might create the trx_data
- structure, if it didn't exist before, so we save the position
- into an auto variable and then write it into the transaction
- data for the binary log (i.e., trx_data).
- */
- my_off_t pos= 0;
- binlog_trans_log_savepos(this, &pos);
- trx_data= (binlog_trx_data*) ha_data[binlog_hton->slot];
-
- trx_data->before_stmt_pos= pos;
-
+ this->binlog_set_stmt_begin();
if (options & (OPTION_NOT_AUTOCOMMIT | OPTION_BEGIN))
trans_register_ha(this, TRUE, binlog_hton);
trans_register_ha(this, FALSE, binlog_hton);
@@ -3357,6 +3419,80 @@
DBUG_VOID_RETURN;
}
+void THD::binlog_set_rollback_point() {
+ DBUG_ENTER("binlog_set_rollback_point");
+ binlog_trx_data *trx_data=
+ (binlog_trx_data*) ha_data[binlog_hton->slot];
+
+ /*
+ The call to binlog_trans_log_savepos() might create the trx_data
+ structure, if it didn't exist before, so we save the position
+ into an auto variable and then write it into the transaction
+ data for the binary log (i.e., trx_data).
+ */
+ my_off_t pos= 0;
+ binlog_trans_log_savepos(this, &pos);
+ trx_data= (binlog_trx_data*) ha_data[binlog_hton->slot];
+ trx_data->rollback_point= pos;
+ DBUG_PRINT("info", ("trx_data->rollback_point=%u", pos));
+ DBUG_VOID_RETURN;
+}
+
+void THD::binlog_clear_rollback_point() {
+ DBUG_ENTER("binlog_clear_rollback_point");
+ binlog_trx_data *trx_data=
+ (binlog_trx_data*) ha_data[binlog_hton->slot];
+
+ if (trx_data)
+ trx_data->rollback_point= MY_OFF_T_UNDEF;
+ DBUG_VOID_RETURN;
+}
+
+void THD::binlog_set_stmt_begin() {
+ binlog_trx_data *trx_data=
+ (binlog_trx_data*) ha_data[binlog_hton->slot];
+
+ /*
+ The call to binlog_trans_log_savepos() might create the trx_data
+ structure, if it didn't exist before, so we save the position
+ into an auto variable and then write it into the transaction
+ data for the binary log (i.e., trx_data).
+ */
+ my_off_t pos= 0;
+ binlog_trans_log_savepos(this, &pos);
+ trx_data= (binlog_trx_data*) ha_data[binlog_hton->slot];
+ trx_data->before_stmt_pos= pos;
+}
+
+int THD::binlog_flush_transaction_cache()
+{
+ DBUG_ENTER("binlog_flush_transaction_cache");
+ binlog_trx_data *trx_data= (binlog_trx_data*) ha_data[binlog_hton->slot];
+ DBUG_PRINT("enter", ("trx_data=0x%lu", trx_data));
+ if (trx_data)
+ DBUG_PRINT("enter", ("trx_data->before_stmt_pos=%u",
+ trx_data->before_stmt_pos));
+
+ /*
+ Write the transaction cache to the binary log. We don't flush and
+ sync the log file since we don't know if more will be written to
+ it. If the caller want the log file sync:ed, the caller has to do
+ it.
+
+ The transaction data is only reset upon a successful write of the
+ cache to the binary log.
+ */
+
+ if (trx_data && likely(mysql_bin_log.is_open())) {
+ if (int error= mysql_bin_log.write_cache(&trx_data->trans_log, true, true))
+ DBUG_RETURN(error);
+ trx_data->reset();
+ }
+
+ DBUG_RETURN(0);
+}
+
+
/*
Write a table map to the binary log.
*/
@@ -3768,12 +3904,48 @@
/*
+ Write the contents of a cache to the binary log.
+
+ SYNOPSIS
+ write()
+ cache Cache to write to the binary log
+ sync_log True if the log should be flushed and sync:ed
+
+ DESCRIPTION
+
+ Write the contents of the cache to the binary log. The cache will
+ be reset as a READ_CACHE to be able to read the contents from it.
+ */
+
+int MYSQL_BIN_LOG::write_cache(IO_CACHE *cache, bool lock_log, bool sync_log)
+{
+ Mutex_sentry sentry(lock_log ? &LOCK_log : NULL);
+
+ if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0))
+ return ER_ERROR_ON_WRITE;
+ uint bytes= my_b_bytes_in_cache(cache);
+ do
+ {
+ if (my_b_write(&log_file, cache->read_pos, bytes))
+ return ER_ERROR_ON_WRITE;
+ cache->read_pos= cache->read_end;
+ } while ((bytes= my_b_fill(cache)));
+
+ if (sync_log)
+ flush_and_sync();
+
+ return 0; // All OK
+}
+
+/*
Write a cached log entry to the binary log
SYNOPSIS
write()
thd
cache The cache to copy to the binlog
+ commit_event The commit event to print after writing the
+ contents of the cache.
NOTE
- We only come here if there is something in the cache.
@@ -3833,20 +4005,10 @@
if (qinfo.write(&log_file))
goto err;
}
- /* Read from the file used to cache the queries .*/
- if (reinit_io_cache(cache, READ_CACHE, 0, 0, 0))
- goto err;
- length=my_b_bytes_in_cache(cache);
- DBUG_EXECUTE_IF("half_binlogged_transaction", length-=100;);
- do
- {
- /* Write data to the binary log file */
- if (my_b_write(&log_file, cache->read_pos, length))
- goto err;
- cache->read_pos=cache->read_end; // Mark buffer used up
- DBUG_EXECUTE_IF("half_binlogged_transaction", goto DBUG_skip_commit;);
- } while ((length=my_b_fill(cache)));
+ if ((write_error= write_cache(cache, false, false)))
+ goto err;
+
if (commit_event && commit_event->write(&log_file))
goto err;
#ifndef DBUG_OFF
--- 1.321/sql/sql_class.h 2006-10-23 22:07:05 +02:00
+++ 1.322/sql/sql_class.h 2006-10-23 22:07:05 +02:00
@@ -931,6 +931,10 @@
Public interface to write RBR events to the binlog
*/
void binlog_start_trans_and_stmt();
+ int binlog_flush_transaction_cache();
+ void binlog_set_stmt_begin();
+ void binlog_set_rollback_point();
+ void binlog_clear_rollback_point();
int binlog_write_table_map(TABLE *table, bool is_transactional);
int binlog_write_row(TABLE* table, bool is_transactional,
MY_BITMAP const* cols, my_size_t colcnt,
--- 1.231/sql/sql_insert.cc 2006-10-23 22:07:05 +02:00
+++ 1.232/sql/sql_insert.cc 2006-10-23 22:07:05 +02:00
@@ -2986,6 +2986,10 @@
&thd->extra_lock, hook_ptr)))
DBUG_RETURN(-1); // abort() deletes table
+#ifdef HAVE_ROW_BASED_REPLICATION
+ thd->binlog_set_rollback_point();
+#endif
+
if (table->s->fields < values.elements)
{
my_error(ER_WRONG_VALUE_COUNT_ON_ROW, MYF(0), 1);
@@ -3081,6 +3085,9 @@
by removing the table, even for non-transactional tables.
*/
tmp_disable_binlog(thd);
+#ifdef HAVE_ROW_BASED_REPLICATION
+ thd->binlog_clear_rollback_point();
+#endif
select_insert::send_error(errcode, err);
reenable_binlog(thd);
}
--- 1.7/mysql-test/t/rpl_row_create_table.test 2006-10-23 22:07:05 +02:00
+++ 1.8/mysql-test/t/rpl_row_create_table.test 2006-10-23 22:07:05 +02:00
@@ -2,6 +2,7 @@
--source include/have_binlog_format_row.inc
--source include/master-slave.inc
+--source include/have_innodb.inc
# Bug#18326: Do not lock table for writing during prepare of statement
# The use of the ps protocol causes extra table maps in the binlog, so
@@ -118,3 +119,30 @@
START SLAVE;
--enable_query_log
--enable_ps_protocol
+
+# BUG#22864 (Rollback following CREATE ... SELECT discards 'CREATE
+# table' from log):
+--echo ================ BUG#22864 ================
+connection slave;
+STOP SLAVE;
+RESET SLAVE;
+connection master;
+RESET MASTER;
+connection slave;
+START SLAVE;
+connection master;
+SET AUTOCOMMIT=0;
+CREATE TABLE t1 (a INT);
+INSERT INTO t1 VALUES (1),(2),(3);
+
+CREATE TABLE t2 ENGINE=INNODB SELECT * FROM t1;
+ROLLBACK;
+
+SHOW TABLES;
+SELECT * FROM t1;
+SELECT * FROM t2;
+SHOW BINLOG EVENTS;
+sync_slave_with_master;
+SHOW TABLES;
+SELECT * FROM t1;
+SELECT * FROM t2;
--- 1.16/sql/log.h 2006-10-23 22:07:05 +02:00
+++ 1.17/sql/log.h 2006-10-23 22:07:05 +02:00
@@ -338,6 +338,8 @@
bool write(Log_event* event_info); // binary log write
bool write(THD *thd, IO_CACHE *cache, Log_event *commit_event);
+ int write_cache(IO_CACHE *cache, bool lock_log, bool flush_and_sync);
+
void start_union_events(THD *thd);
void stop_union_events(THD *thd);
bool is_query_in_union(THD *thd, query_id_t query_id_param);
| Thread |
|---|
| • bk commit into 5.1 tree (mats:1.2309) BUG#22864 | Mats Kindahl | 23 Oct |