#At bzr+ssh://bk-internal.mysql.com/bzrroot/server/mysql-maria/ based on revid:guilhem@stripped
2708 Guilhem Bichot 2008-12-05
Fix for BUG#41037 "Maria: recovery failure (pushbuild2)" (checkpoint bug).
added:
mysql-test/suite/maria/r/maria-recovery3.result
mysql-test/suite/maria/t/maria-recovery3-master.opt
mysql-test/suite/maria/t/maria-recovery3.test
modified:
storage/maria/ma_blockrec.c
storage/maria/ma_blockrec.h
storage/maria/ma_commit.c
storage/maria/ma_loghandler.c
storage/maria/ma_recovery.c
storage/maria/trnman.c
per-file messages:
mysql-test/suite/maria/r/maria-recovery3.result
result.
mysql-test/suite/maria/t/maria-recovery3-master.opt
usual recovery test options
mysql-test/suite/maria/t/maria-recovery3.test
Test for BUG#41037. Without the bugfix, the test would hang because Recovery would fail with "undo_key_insert got error 126": Recovery would believe INSERT of 2 is uncommitted, try to execute its UNDO_KEY_INSERT, fail to find the key to delete because DELETE deleted it. That failure of _ma_ck_real_delete() would mark table as corrupted (see
in d_search()) which is error 126 (HA_ERR_CRASHED).
storage/maria/ma_blockrec.c
set trn->rec_lsn to LSN of commit record, when writing it.
storage/maria/ma_blockrec.h
new function
storage/maria/ma_commit.c
when committing or rolling back, rec_lsn should be LSN_IMPOSSIBLE: assertion is moved here
from trnman_end_trn(), because now ma_commit() can set rec_lsn and so when we reach trnman_end_trn()
it's not LSN_IMPOSSIBLE anymore.
Adding debug possibility to pause between COMMIT record write and trnman_commit_trn(), to be
able to fire checkpoint in between.
storage/maria/ma_loghandler.c
in-write hook for COMMIT records
storage/maria/ma_recovery.c
More debugging info in maria_recovery.trace, to see how the starting point of REDO phase is determined
storage/maria/trnman.c
assertion cannot be here anymore see ma_commit.c
=== added file 'mysql-test/suite/maria/r/maria-recovery3.result'
--- a/mysql-test/suite/maria/r/maria-recovery3.result 1970-01-01 00:00:00 +0000
+++ b/mysql-test/suite/maria/r/maria-recovery3.result 2008-12-05 21:11:46 +0000
@@ -0,0 +1,29 @@
+set global maria_log_file_size=4294967295;
+drop database if exists mysqltest;
+create database mysqltest;
+use mysqltest;
+* shut down mysqld, removed logs, restarted it
+* TEST of Checkpoint between writing the commit log record and committing in trnman
+create table t1(a int primary key) engine=maria;
+insert into t1 values(1);
+flush table t1;
+* copied t1 for comparison
+set session debug="+d,maria_sleep_in_commit";
+insert into t1 values(2);
+set global maria_checkpoint_interval=1000;
+delete from t1 where a=2;
+SET SESSION debug="+d,maria_flush_whole_log,maria_crash";
+* crashing mysqld intentionally
+set global maria_checkpoint_interval=1;
+ERROR HY000: Lost connection to MySQL server during query
+* recovery happens
+check table t1 extended;
+Table Op Msg_type Msg_text
+mysqltest.t1 check status OK
+* testing that checksum after recovery is as expected
+Checksum-check
+ok
+use mysqltest;
+drop table t1;
+drop database mysqltest_for_comparison;
+drop database mysqltest;
=== added file 'mysql-test/suite/maria/t/maria-recovery3-master.opt'
--- a/mysql-test/suite/maria/t/maria-recovery3-master.opt 1970-01-01 00:00:00 +0000
+++ b/mysql-test/suite/maria/t/maria-recovery3-master.opt 2008-12-05 21:11:46 +0000
@@ -0,0 +1 @@
+--skip-stack-trace --skip-core-file --loose-maria-log-dir-path=$MYSQLTEST_VARDIR/tmp
=== added file 'mysql-test/suite/maria/t/maria-recovery3.test'
--- a/mysql-test/suite/maria/t/maria-recovery3.test 1970-01-01 00:00:00 +0000
+++ b/mysql-test/suite/maria/t/maria-recovery3.test 2008-12-05 21:11:46 +0000
@@ -0,0 +1,71 @@
+--source include/not_embedded.inc
+# Don't test this under valgrind, memory leaks will occur as we crash
+--source include/not_valgrind.inc
+# Binary must be compiled with debug for crash to occur
+--source include/have_debug.inc
+--source include/have_maria.inc
+
+set global maria_log_file_size=4294967295;
+let $MARIA_LOG=../tmp;
+
+--disable_warnings
+drop database if exists mysqltest;
+--enable_warnings
+create database mysqltest;
+let $mms_tname=t;
+
+# Include scripts can perform SQL. For it to not influence the main test
+# they use a separate connection. This way if they use a DDL it would
+# not autocommit in the main test.
+connect (admin, 127.0.0.1, root,,mysqltest,,);
+--enable_reconnect
+
+connection default;
+use mysqltest;
+--enable_reconnect
+
+let $mms_tables=1;
+let $mvr_restore_old_snapshot=0;
+let $mms_compare_physically=0;
+let $mvr_debug_option="+d,maria_flush_whole_log,maria_crash";
+let $mvr_crash_statement= set global maria_checkpoint_interval=1;
+
+-- source include/maria_empty_logs.inc
+
+# Test for BUG#41037 (recovery failure)
+--echo * TEST of Checkpoint between writing the commit log record and committing in trnman
+# we want recovery to use the tables as they were at time of crash
+let $mvr_restore_old_snapshot=0;
+# UNDO phase prevents physical comparison, normally,
+# so we'll only use checksums to compare.
+let $mms_compare_physically=0;
+let $mvr_crash_statement= set global maria_checkpoint_interval=1;
+create table t1(a int primary key) engine=maria;
+insert into t1 values(1);
+-- source include/maria_make_snapshot_for_comparison.inc
+set session debug="+d,maria_sleep_in_commit";
+send insert into t1 values(2);
+sleep 1;
+# Now the INSERT of 2 has written a commit record
+# but not yet called trnman_commit(), so for checkpoint it's not
+# committed.
+connection admin;
+set global maria_checkpoint_interval=1000; # force a checkpoint
+connection default;
+reap; # end of INSERT
+delete from t1 where a=2;
+# Bug was that: Recovery starts REDO scanning from too far: from
+# Checkpoint record which says INSERT is not committed, so
+# Recovery executes the INSERT's UNDO and finds no key to delete
+# (as DELETE already deleted it), fails.
+-- source include/maria_verify_recovery.inc
+drop table t1;
+
+# Note that even if machine is loaded and thus INSERT is committed
+# before checkpoint happens, test should still pass (though it won't
+# reproduce the conditions of the bug).
+
+# clean up everything
+let $mms_purpose=comparison;
+eval drop database mysqltest_for_$mms_purpose;
+drop database mysqltest;
=== modified file 'storage/maria/ma_blockrec.c'
--- a/storage/maria/ma_blockrec.c 2008-10-14 15:18:14 +0000
+++ b/storage/maria/ma_blockrec.c 2008-12-05 21:11:46 +0000
@@ -5914,7 +5914,7 @@ my_bool write_hook_for_file_id(enum tran
TRN *trn
__attribute__ ((unused)),
MARIA_HA *tbl_info,
- LSN *lsn __attribute__ ((unused)),
+ LSN *lsn,
void *hook_arg
__attribute__ ((unused)))
{
@@ -5923,6 +5923,44 @@ my_bool write_hook_for_file_id(enum tran
return 0;
}
+
+/**
+ Updates transaction's rec_lsn when committing.
+
+ A transaction writes its commit record before being committed in trnman, so
+ if Checkpoint happens just between the COMMIT record log write and the
+ commit in trnman, it will record that transaction is not committed. Assume
+ the transaction (trn1) did an INSERT; after the checkpoint, a second
+ transaction (trn2) does a DELETE of what trn1 has inserted. Then crash,
+ Checkpoint record says that trn1 was not committed, and REDO phase starts
+ from Checkpoint record's LSN. So it will not find the COMMIT record of
+ trn1, will want to roll back trn1, which will fail because the row/key
+ which it wants to delete does not exist anymore.
+ To avoid this, Checkpoint needs to know that the REDO phase must start
+ before this COMMIT record, so transaction sets its rec_lsn to the COMMIT's
+ record LSN, and as Checkpoint reads the transaction's rec_lsn, Checkpoint
+ will know.
+
+ @note so after commit trn->rec_lsn is a "commit LSN", which could be of
+ use later.
+
+ @return Operation status, always 0 (success)
+*/
+
+my_bool write_hook_for_commit(enum translog_record_type type
+ __attribute__ ((unused)),
+ TRN *trn,
+ MARIA_HA *tbl_info
+ __attribute__ ((unused)),
+ LSN *lsn,
+ void *hook_arg
+ __attribute__ ((unused)))
+{
+ trn->rec_lsn= *lsn;
+ return 0;
+}
+
+
/***************************************************************************
Applying of REDO log records
***************************************************************************/
=== modified file 'storage/maria/ma_blockrec.h'
--- a/storage/maria/ma_blockrec.h 2008-10-14 09:38:07 +0000
+++ b/storage/maria/ma_blockrec.h 2008-12-05 21:11:46 +0000
@@ -271,6 +271,9 @@ my_bool write_hook_for_undo_bulk_insert(
my_bool write_hook_for_file_id(enum translog_record_type type,
TRN *trn, MARIA_HA *tbl_info, LSN *lsn,
void *hook_arg);
+my_bool write_hook_for_commit(enum translog_record_type type,
+ TRN *trn, MARIA_HA *tbl_info, LSN *lsn,
+ void *hook_arg);
void _ma_block_get_status(void* param, my_bool concurrent_insert);
void _ma_block_update_status(void *param);
void _ma_block_restore_status(void *param);
=== modified file 'storage/maria/ma_commit.c'
--- a/storage/maria/ma_commit.c 2008-10-09 20:03:54 +0000
+++ b/storage/maria/ma_commit.c 2008-12-05 21:11:46 +0000
@@ -33,6 +33,7 @@ int ma_commit(TRN *trn)
LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS];
DBUG_ENTER("ma_commit");
+ DBUG_ASSERT(trn->rec_lsn == LSN_IMPOSSIBLE);
if (trn->undo_lsn == 0) /* no work done, rollback (cheaper than commit) */
DBUG_RETURN(trnman_rollback_trn(trn));
/*
@@ -61,8 +62,14 @@ int ma_commit(TRN *trn)
trn, NULL, 0,
sizeof(log_array)/sizeof(log_array[0]),
log_array, NULL, NULL) |
- translog_flush(commit_lsn) |
- trnman_commit_trn(trn));
+ translog_flush(commit_lsn));
+
+ DBUG_EXECUTE_IF("maria_sleep_in_commit",
+ {
+ DBUG_PRINT("info", ("maria_sleep_in_commit"));
+ sleep(3);
+ });
+ res|= trnman_commit_trn(trn);
/*
=== modified file 'storage/maria/ma_loghandler.c'
--- a/storage/maria/ma_loghandler.c 2008-12-02 22:02:52 +0000
+++ b/storage/maria/ma_loghandler.c 2008-12-05 21:11:46 +0000
@@ -618,11 +618,11 @@ static LOG_DESC INIT_LOGREC_PREPARE_WITH
static LOG_DESC INIT_LOGREC_COMMIT=
{LOGRECTYPE_FIXEDLENGTH, 0, 0, NULL,
- NULL, NULL, 0, "commit", LOGREC_IS_GROUP_ITSELF, NULL,
+ write_hook_for_commit, NULL, 0, "commit", LOGREC_IS_GROUP_ITSELF, NULL,
NULL};
static LOG_DESC INIT_LOGREC_COMMIT_WITH_UNDO_PURGE=
-{LOGRECTYPE_PSEUDOFIXEDLENGTH, 5, 5, NULL, NULL, NULL, 1,
+{LOGRECTYPE_PSEUDOFIXEDLENGTH, 5, 5, NULL, write_hook_for_commit, NULL, 1,
"commit_with_undo_purge", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
static LOG_DESC INIT_LOGREC_CHECKPOINT=
=== modified file 'storage/maria/ma_recovery.c'
--- a/storage/maria/ma_recovery.c 2008-11-24 14:23:48 +0000
+++ b/storage/maria/ma_recovery.c 2008-12-05 21:11:46 +0000
@@ -555,8 +555,9 @@ static void new_transaction(uint16 sid,
char llbuf[22];
all_active_trans[sid].long_trid= long_id;
llstr(long_id, llbuf);
- tprint(tracef, "Transaction long_trid %s short_trid %u starts\n",
- llbuf, sid);
+ tprint(tracef, "Transaction long_trid %s short_trid %u starts,"
+ " undo_lsn (%lu,0x%lx) first_undo_lsn (%lu,0x%lx)\n",
+ llbuf, sid, LSN_IN_PARTS(undo_lsn), LSN_IN_PARTS(first_undo_lsn));
all_active_trans[sid].undo_lsn= undo_lsn;
all_active_trans[sid].first_undo_lsn= first_undo_lsn;
set_if_bigger(max_long_trid, long_id);
@@ -2968,6 +2969,8 @@ static LSN parse_checkpoint_record(LSN l
ptr= log_record_buffer.str;
start_address= lsn_korr(ptr);
ptr+= LSN_STORE_SIZE;
+ tprint(tracef, "Checkpoint record has start_horizon at (%lu,0x%lx)\n",
+ LSN_IN_PARTS(start_address));
/* transactions */
nb_active_transactions= uint2korr(ptr);
@@ -2983,6 +2986,9 @@ static LSN parse_checkpoint_record(LSN l
line. It may make start_address slightly decrease (only by the time it
takes to write one or a few rows, roughly).
*/
+ tprint(tracef, "Checkpoint record has min_rec_lsn of active transactions"
+ " at (%lu,0x%lx)\n",
+ LSN_IN_PARTS(minimum_rec_lsn_of_active_transactions));
set_if_smaller(start_address, minimum_rec_lsn_of_active_transactions);
for (i= 0; i < nb_active_transactions; i++)
@@ -3086,6 +3092,8 @@ static LSN parse_checkpoint_record(LSN l
*/
start_address= checkpoint_start=
translog_next_LSN(start_address, LSN_IMPOSSIBLE);
+ tprint(tracef, "Checkpoint record start_horizon now adjusted to"
+ " LSN (%lu,0x%lx)\n", LSN_IN_PARTS(start_address));
if (checkpoint_start == LSN_IMPOSSIBLE)
{
/*
@@ -3095,6 +3103,8 @@ static LSN parse_checkpoint_record(LSN l
return LSN_ERROR;
}
/* now, where the REDO phase should start reading log: */
+ tprint(tracef, "Checkpoint has min_rec_lsn of dirty pages at"
+ " LSN (%lu,0x%lx)\n", LSN_IN_PARTS(minimum_rec_lsn_of_dirty_pages));
set_if_smaller(start_address, minimum_rec_lsn_of_dirty_pages);
DBUG_PRINT("info",
("checkpoint_start: (%lu,0x%lx) start_address: (%lu,0x%lx)",
=== modified file 'storage/maria/trnman.c'
--- a/storage/maria/trnman.c 2008-11-20 14:11:00 +0000
+++ b/storage/maria/trnman.c 2008-12-05 21:11:46 +0000
@@ -389,7 +389,6 @@ my_bool trnman_end_trn(TRN *trn, my_bool
LF_PINS *pins= trn->pins;
DBUG_ENTER("trnman_end_trn");
- DBUG_ASSERT(trn->rec_lsn == 0);
/* if a rollback, all UNDO records should have been executed */
DBUG_ASSERT(commit || trn->undo_lsn == 0);
DBUG_PRINT("info", ("pthread_mutex_lock LOCK_trn_list"));
| Thread |
|---|
| • bzr commit into MySQL/Maria:mysql-maria branch (guilhem:2708) Bug#41037 | Guilhem Bichot | 5 Dec |