List:Maria Storage Engine« Previous MessageNext Message »
From:Guilhem Bichot Date:December 5 2008 9:12pm
Subject:bzr commit into MySQL/Maria:mysql-maria branch (guilhem:2708) Bug#41037
View as plain text  
#At bzr+ssh://bk-internal.mysql.com/bzrroot/server/mysql-maria/ based on revid:guilhem@stripped

 2708 Guilhem Bichot	2008-12-05
      Fix for BUG#41037 "Maria: recovery failure (pushbuild2)" (checkpoint bug).
added:
  mysql-test/suite/maria/r/maria-recovery3.result
  mysql-test/suite/maria/t/maria-recovery3-master.opt
  mysql-test/suite/maria/t/maria-recovery3.test
modified:
  storage/maria/ma_blockrec.c
  storage/maria/ma_blockrec.h
  storage/maria/ma_commit.c
  storage/maria/ma_loghandler.c
  storage/maria/ma_recovery.c
  storage/maria/trnman.c

per-file messages:
  mysql-test/suite/maria/r/maria-recovery3.result
    result.
  mysql-test/suite/maria/t/maria-recovery3-master.opt
    usual recovery test options
  mysql-test/suite/maria/t/maria-recovery3.test
    Test for BUG#41037. Without the bugfix, the test would hang because Recovery would fail with "undo_key_insert got error 126": Recovery would believe INSERT of 2 is uncommitted, try to execute its UNDO_KEY_INSERT, fail to find the key to delete because DELETE deleted it. That failure of _ma_ck_real_delete() would mark table as corrupted (see
    in d_search()) which is error 126 (HA_ERR_CRASHED).
  storage/maria/ma_blockrec.c
    set trn->rec_lsn to LSN of commit record, when writing it.
  storage/maria/ma_blockrec.h
    new function
  storage/maria/ma_commit.c
    when committing or rolling back, rec_lsn should be LSN_IMPOSSIBLE: assertion is moved here
    from trnman_end_trn(), because now ma_commit() can set rec_lsn and so when we reach trnman_end_trn()
    it's not LSN_IMPOSSIBLE anymore.
    Adding debug possibility to pause between COMMIT record write and trnman_commit_trn(), to be
    able to fire checkpoint in between.
  storage/maria/ma_loghandler.c
    in-write hook for COMMIT records
  storage/maria/ma_recovery.c
    More debugging info in maria_recovery.trace, to see how the starting point of REDO phase is determined
  storage/maria/trnman.c
    assertion cannot be here anymore see ma_commit.c
=== added file 'mysql-test/suite/maria/r/maria-recovery3.result'
--- a/mysql-test/suite/maria/r/maria-recovery3.result	1970-01-01 00:00:00 +0000
+++ b/mysql-test/suite/maria/r/maria-recovery3.result	2008-12-05 21:11:46 +0000
@@ -0,0 +1,29 @@
+set global maria_log_file_size=4294967295;
+drop database if exists mysqltest;
+create database mysqltest;
+use mysqltest;
+* shut down mysqld, removed logs, restarted it
+* TEST of Checkpoint between writing the commit log record and committing in trnman
+create table t1(a int primary key) engine=maria;
+insert into t1 values(1);
+flush table t1;
+* copied t1 for comparison
+set session debug="+d,maria_sleep_in_commit";
+insert into t1 values(2);
+set global maria_checkpoint_interval=1000;
+delete from t1 where a=2;
+SET SESSION debug="+d,maria_flush_whole_log,maria_crash";
+* crashing mysqld intentionally
+set global maria_checkpoint_interval=1;
+ERROR HY000: Lost connection to MySQL server during query
+* recovery happens
+check table t1 extended;
+Table	Op	Msg_type	Msg_text
+mysqltest.t1	check	status	OK
+* testing that checksum after recovery is as expected
+Checksum-check
+ok
+use mysqltest;
+drop table t1;
+drop database mysqltest_for_comparison;
+drop database mysqltest;

=== added file 'mysql-test/suite/maria/t/maria-recovery3-master.opt'
--- a/mysql-test/suite/maria/t/maria-recovery3-master.opt	1970-01-01 00:00:00 +0000
+++ b/mysql-test/suite/maria/t/maria-recovery3-master.opt	2008-12-05 21:11:46 +0000
@@ -0,0 +1 @@
+--skip-stack-trace --skip-core-file --loose-maria-log-dir-path=$MYSQLTEST_VARDIR/tmp

=== added file 'mysql-test/suite/maria/t/maria-recovery3.test'
--- a/mysql-test/suite/maria/t/maria-recovery3.test	1970-01-01 00:00:00 +0000
+++ b/mysql-test/suite/maria/t/maria-recovery3.test	2008-12-05 21:11:46 +0000
@@ -0,0 +1,71 @@
+--source include/not_embedded.inc
+# Don't test this under valgrind, memory leaks will occur as we crash
+--source include/not_valgrind.inc
+# Binary must be compiled with debug for crash to occur
+--source include/have_debug.inc
+--source include/have_maria.inc
+
+set global maria_log_file_size=4294967295;
+let $MARIA_LOG=../tmp;
+
+--disable_warnings
+drop database if exists mysqltest;
+--enable_warnings
+create database mysqltest;
+let $mms_tname=t;
+
+# Include scripts can perform SQL. For it to not influence the main test
+# they use a separate connection. This way if they use a DDL it would
+# not autocommit in the main test.
+connect (admin, 127.0.0.1, root,,mysqltest,,);
+--enable_reconnect
+
+connection default;
+use mysqltest;
+--enable_reconnect
+
+let $mms_tables=1;
+let $mvr_restore_old_snapshot=0;
+let $mms_compare_physically=0;
+let $mvr_debug_option="+d,maria_flush_whole_log,maria_crash";
+let $mvr_crash_statement= set global maria_checkpoint_interval=1;
+
+-- source include/maria_empty_logs.inc
+
+# Test for BUG#41037 (recovery failure)
+--echo * TEST of Checkpoint between writing the commit log record and committing in trnman
+# we want recovery to use the tables as they were at time of crash
+let $mvr_restore_old_snapshot=0;
+# UNDO phase prevents physical comparison, normally,
+# so we'll only use checksums to compare.
+let $mms_compare_physically=0;
+let $mvr_crash_statement= set global maria_checkpoint_interval=1;
+create table t1(a int primary key) engine=maria;
+insert into t1 values(1);
+-- source include/maria_make_snapshot_for_comparison.inc
+set session debug="+d,maria_sleep_in_commit";
+send insert into t1 values(2);
+sleep 1;
+# Now the INSERT of 2 has written a commit record
+# but not yet called trnman_commit(), so for checkpoint it's not
+# committed.
+connection admin;
+set global maria_checkpoint_interval=1000; # force a checkpoint
+connection default;
+reap; # end of INSERT
+delete from t1 where a=2;
+# Bug was that: Recovery starts REDO scanning from too far: from
+# Checkpoint record which says INSERT is not committed, so
+# Recovery executes the INSERT's UNDO and finds no key to delete
+# (as DELETE already deleted it), fails.
+-- source include/maria_verify_recovery.inc
+drop table t1;
+
+# Note that even if machine is loaded and thus INSERT is committed
+# before checkpoint happens, test should still pass (though it won't
+# reproduce the conditions of the bug).
+
+# clean up everything
+let $mms_purpose=comparison;
+eval drop database mysqltest_for_$mms_purpose;
+drop database mysqltest;

=== modified file 'storage/maria/ma_blockrec.c'
--- a/storage/maria/ma_blockrec.c	2008-10-14 15:18:14 +0000
+++ b/storage/maria/ma_blockrec.c	2008-12-05 21:11:46 +0000
@@ -5914,7 +5914,7 @@ my_bool write_hook_for_file_id(enum tran
                                TRN *trn
                                __attribute__ ((unused)),
                                MARIA_HA *tbl_info,
-                               LSN *lsn __attribute__ ((unused)),
+                               LSN *lsn,
                                void *hook_arg
                                __attribute__ ((unused)))
 {
@@ -5923,6 +5923,44 @@ my_bool write_hook_for_file_id(enum tran
   return 0;
 }
 
+
+/**
+   Updates transaction's rec_lsn when committing.
+
+   A transaction writes its commit record before being committed in trnman, so
+   if Checkpoint happens just between the COMMIT record log write and the
+   commit in trnman, it will record that transaction is not committed. Assume
+   the transaction (trn1) did an INSERT; after the checkpoint, a second
+   transaction (trn2) does a DELETE of what trn1 has inserted. Then crash,
+   Checkpoint record says that trn1 was not committed, and REDO phase starts
+   from Checkpoint record's LSN. So it will not find the COMMIT record of
+   trn1, will want to roll back trn1, which will fail because the row/key
+   which it wants to delete does not exist anymore.
+   To avoid this, Checkpoint needs to know that the REDO phase must start
+   before this COMMIT record, so transaction sets its rec_lsn to the COMMIT's
+   record LSN, and as Checkpoint reads the transaction's rec_lsn, Checkpoint
+   will know.
+
+   @note so after commit trn->rec_lsn is a "commit LSN", which could be of
+   use later.
+
+   @return Operation status, always 0 (success)
+*/
+
+my_bool write_hook_for_commit(enum translog_record_type type
+                              __attribute__ ((unused)),
+                              TRN *trn,
+                              MARIA_HA *tbl_info
+                              __attribute__ ((unused)),
+                              LSN *lsn,
+                              void *hook_arg
+                              __attribute__ ((unused)))
+{
+  trn->rec_lsn= *lsn;
+  return 0;
+}
+
+
 /***************************************************************************
   Applying of REDO log records
 ***************************************************************************/

=== modified file 'storage/maria/ma_blockrec.h'
--- a/storage/maria/ma_blockrec.h	2008-10-14 09:38:07 +0000
+++ b/storage/maria/ma_blockrec.h	2008-12-05 21:11:46 +0000
@@ -271,6 +271,9 @@ my_bool write_hook_for_undo_bulk_insert(
 my_bool write_hook_for_file_id(enum translog_record_type type,
                                TRN *trn, MARIA_HA *tbl_info, LSN *lsn,
                                void *hook_arg);
+my_bool write_hook_for_commit(enum translog_record_type type,
+                              TRN *trn, MARIA_HA *tbl_info, LSN *lsn,
+                              void *hook_arg);
 void _ma_block_get_status(void* param, my_bool concurrent_insert);
 void _ma_block_update_status(void *param);
 void _ma_block_restore_status(void *param);

=== modified file 'storage/maria/ma_commit.c'
--- a/storage/maria/ma_commit.c	2008-10-09 20:03:54 +0000
+++ b/storage/maria/ma_commit.c	2008-12-05 21:11:46 +0000
@@ -33,6 +33,7 @@ int ma_commit(TRN *trn)
   LEX_CUSTRING log_array[TRANSLOG_INTERNAL_PARTS];
   DBUG_ENTER("ma_commit");
 
+  DBUG_ASSERT(trn->rec_lsn == LSN_IMPOSSIBLE);
   if (trn->undo_lsn == 0) /* no work done, rollback (cheaper than commit) */
     DBUG_RETURN(trnman_rollback_trn(trn));
   /*
@@ -61,8 +62,14 @@ int ma_commit(TRN *trn)
                              trn, NULL, 0,
                              sizeof(log_array)/sizeof(log_array[0]),
                              log_array, NULL, NULL) |
-        translog_flush(commit_lsn) |
-        trnman_commit_trn(trn));
+        translog_flush(commit_lsn));
+
+  DBUG_EXECUTE_IF("maria_sleep_in_commit",
+                  {
+                    DBUG_PRINT("info", ("maria_sleep_in_commit"));
+                    sleep(3);
+                  });
+  res|= trnman_commit_trn(trn);
 
 
   /*

=== modified file 'storage/maria/ma_loghandler.c'
--- a/storage/maria/ma_loghandler.c	2008-12-02 22:02:52 +0000
+++ b/storage/maria/ma_loghandler.c	2008-12-05 21:11:46 +0000
@@ -618,11 +618,11 @@ static LOG_DESC INIT_LOGREC_PREPARE_WITH
 
 static LOG_DESC INIT_LOGREC_COMMIT=
 {LOGRECTYPE_FIXEDLENGTH, 0, 0, NULL,
- NULL, NULL, 0, "commit", LOGREC_IS_GROUP_ITSELF, NULL,
+ write_hook_for_commit, NULL, 0, "commit", LOGREC_IS_GROUP_ITSELF, NULL,
  NULL};
 
 static LOG_DESC INIT_LOGREC_COMMIT_WITH_UNDO_PURGE=
-{LOGRECTYPE_PSEUDOFIXEDLENGTH, 5, 5, NULL, NULL, NULL, 1,
+{LOGRECTYPE_PSEUDOFIXEDLENGTH, 5, 5, NULL, write_hook_for_commit, NULL, 1,
  "commit_with_undo_purge", LOGREC_IS_GROUP_ITSELF, NULL, NULL};
 
 static LOG_DESC INIT_LOGREC_CHECKPOINT=

=== modified file 'storage/maria/ma_recovery.c'
--- a/storage/maria/ma_recovery.c	2008-11-24 14:23:48 +0000
+++ b/storage/maria/ma_recovery.c	2008-12-05 21:11:46 +0000
@@ -555,8 +555,9 @@ static void new_transaction(uint16 sid, 
   char llbuf[22];
   all_active_trans[sid].long_trid= long_id;
   llstr(long_id, llbuf);
-  tprint(tracef, "Transaction long_trid %s short_trid %u starts\n",
-         llbuf, sid);
+  tprint(tracef, "Transaction long_trid %s short_trid %u starts,"
+         " undo_lsn (%lu,0x%lx) first_undo_lsn (%lu,0x%lx)\n",
+         llbuf, sid, LSN_IN_PARTS(undo_lsn), LSN_IN_PARTS(first_undo_lsn));
   all_active_trans[sid].undo_lsn= undo_lsn;
   all_active_trans[sid].first_undo_lsn= first_undo_lsn;
   set_if_bigger(max_long_trid, long_id);
@@ -2968,6 +2969,8 @@ static LSN parse_checkpoint_record(LSN l
   ptr= log_record_buffer.str;
   start_address= lsn_korr(ptr);
   ptr+= LSN_STORE_SIZE;
+  tprint(tracef, "Checkpoint record has start_horizon at (%lu,0x%lx)\n",
+         LSN_IN_PARTS(start_address));
 
   /* transactions */
   nb_active_transactions= uint2korr(ptr);
@@ -2983,6 +2986,9 @@ static LSN parse_checkpoint_record(LSN l
     line. It may make start_address slightly decrease (only by the time it
     takes to write one or a few rows, roughly).
   */
+  tprint(tracef, "Checkpoint record has min_rec_lsn of active transactions"
+         " at (%lu,0x%lx)\n",
+         LSN_IN_PARTS(minimum_rec_lsn_of_active_transactions));
   set_if_smaller(start_address, minimum_rec_lsn_of_active_transactions);
 
   for (i= 0; i < nb_active_transactions; i++)
@@ -3086,6 +3092,8 @@ static LSN parse_checkpoint_record(LSN l
   */
   start_address= checkpoint_start=
     translog_next_LSN(start_address, LSN_IMPOSSIBLE);
+  tprint(tracef, "Checkpoint record start_horizon now adjusted to"
+         " LSN (%lu,0x%lx)\n", LSN_IN_PARTS(start_address));
   if (checkpoint_start == LSN_IMPOSSIBLE)
   {
     /*
@@ -3095,6 +3103,8 @@ static LSN parse_checkpoint_record(LSN l
     return LSN_ERROR;
   }
   /* now, where the REDO phase should start reading log: */
+  tprint(tracef, "Checkpoint has min_rec_lsn of dirty pages at"
+         " LSN (%lu,0x%lx)\n", LSN_IN_PARTS(minimum_rec_lsn_of_dirty_pages));
   set_if_smaller(start_address, minimum_rec_lsn_of_dirty_pages);
   DBUG_PRINT("info",
              ("checkpoint_start: (%lu,0x%lx) start_address: (%lu,0x%lx)",

=== modified file 'storage/maria/trnman.c'
--- a/storage/maria/trnman.c	2008-11-20 14:11:00 +0000
+++ b/storage/maria/trnman.c	2008-12-05 21:11:46 +0000
@@ -389,7 +389,6 @@ my_bool trnman_end_trn(TRN *trn, my_bool
   LF_PINS *pins= trn->pins;
   DBUG_ENTER("trnman_end_trn");
 
-  DBUG_ASSERT(trn->rec_lsn == 0);
   /* if a rollback, all UNDO records should have been executed */
   DBUG_ASSERT(commit || trn->undo_lsn == 0);
   DBUG_PRINT("info", ("pthread_mutex_lock LOCK_trn_list"));

Thread
bzr commit into MySQL/Maria:mysql-maria branch (guilhem:2708) Bug#41037Guilhem Bichot5 Dec