From: Guilhem Bichot Date: January 16 2009 9:00pm Subject: bzr commit into MySQL/Maria:mysql-maria branch (guilhem:2720) Bug#42112 List-Archive: http://lists.mysql.com/maria/404 X-Bug: 42112 Message-Id: <20090116210052.8EF328CCC@gbichot4.local> MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit #At bzr+ssh://bk-internal.mysql.com/bzrroot/server/mysql-maria/ based on revid:guilhem@stripped 2720 Guilhem Bichot 2009-01-16 Fix for BUG#42112 "Maria: recovery failure (pushbuild2) Assertion `rownr == 0 && new_page' failed" modified: mysql-test/suite/maria/r/maria-recovery3.result mysql-test/suite/maria/t/maria-recovery3.test storage/maria/ma_create.c storage/maria/ma_delete_all.c per-file messages: mysql-test/suite/maria/r/maria-recovery3.result result update mysql-test/suite/maria/t/maria-recovery3.test Test for BUG#42112; before the bugfix, recovery would assert like this: ma_blockrec.c:6051: _ma_apply_redo_insert_row_head_or_tail: Assertion `rownr == 0 && new_page' failed. storage/maria/ma_create.c Fix for BUG#42112; plus some intentional crashes to test the fix. The bug was that if crash happened during TRUNCATE TABLE, in maria_create(), after the index file's state has been written but before its LSNs have been updated (so, if crash happened between _ma_state_info_write_sub() and _ma_update_state__lsns_sub()), then that would leave a table with create_rename_lsn==0. Recovery would then try old pre-TRUNCATE REDOs on this table, and fail as this table is already partly shortened. Fix is to write create_rename_lsn==LSN_MAX as soon as TRUNCATE touches the index file, so that Recovery ignores this table. This allows Maria to start; the table is still corrupted but the user can successfully repeat TRUNCATE TABLE (which required Maria to start). storage/maria/ma_delete_all.c A comment. === modified file 'mysql-test/suite/maria/r/maria-recovery3.result' --- a/mysql-test/suite/maria/r/maria-recovery3.result 2009-01-15 15:14:47 +0000 +++ b/mysql-test/suite/maria/r/maria-recovery3.result 2009-01-16 21:00:39 +0000 @@ -63,5 +63,31 @@ ok * compared t1 to old version use mysqltest; drop table t1; +create table t1 engine=maria select 1; +* copied t1 for feeding_recovery +set global maria_checkpoint_interval=0; +insert into t1 values(2); +truncate table t1; +flush table t1; +* copied t1 for comparison +truncate table t1; +SET SESSION debug="+d,maria_flush_whole_log,maria_crash_create_table"; +* crashing mysqld intentionally +truncate table t1; +ERROR HY000: Lost connection to MySQL server during query +* recovery happens +check table t1 extended; +Table Op Msg_type Msg_text +mysqltest.t1 check warning Size of indexfile is: 372 Should be: 8192 +mysqltest.t1 check status OK +* testing that checksum after recovery is as expected +Checksum-check +ok +use mysqltest; +truncate table t1; +check table t1 extended; +Table Op Msg_type Msg_text +mysqltest.t1 check status OK +drop table t1; drop database mysqltest_for_comparison; drop database mysqltest; === modified file 'mysql-test/suite/maria/t/maria-recovery3.test' --- a/mysql-test/suite/maria/t/maria-recovery3.test 2009-01-15 15:14:47 +0000 +++ b/mysql-test/suite/maria/t/maria-recovery3.test 2009-01-16 21:00:39 +0000 @@ -88,6 +88,28 @@ check table t1 extended; -- source include/maria_verify_recovery.inc drop table t1; +# Test for BUG#42112 "Maria: recovery failure (pushbuild2) Assertion +# `rownr == 0 && new_page' failed" + +let $mvr_restore_old_snapshot=0; +let $mms_compare_physically=0; +create table t1 engine=maria select 1; +-- source include/maria_make_snapshot_for_feeding_recovery.inc +set global maria_checkpoint_interval=0; # no checkpoints +insert into t1 values(2); +truncate table t1; +-- source include/maria_make_snapshot_for_comparison.inc +let $mvr_crash_statement= truncate table t1; +let $mvr_debug_option="+d,maria_flush_whole_log,maria_crash_create_table"; +truncate table t1; +-- source include/maria_verify_recovery.inc +# Table is bad but at least Recovery didn't crash and a new truncate +# can succeed: +truncate table t1; +check table t1 extended; +drop table t1; + + # clean up everything let $mms_purpose=comparison; eval drop database mysqltest_for_$mms_purpose; === modified file 'storage/maria/ma_create.c' --- a/storage/maria/ma_create.c 2008-10-15 20:00:35 +0000 +++ b/storage/maria/ma_create.c 2009-01-16 21:00:39 +0000 @@ -750,6 +750,13 @@ int maria_create(const char *name, enum (via maria_recreate_table()) and it does not have a log. */ sync_dir= MY_SYNC_DIR; + /* + If crash between _ma_state_info_write_sub() and + _ma_update_state__lsns_sub(), table should be ignored by Recovery (or + old REDOs would fail), so we cannot let LSNs be 0: + */ + share.state.skip_redo_lsn= share.state.is_of_horizon= + share.state.create_rename_lsn= LSN_MAX; } if (datafile_type == DYNAMIC_RECORD) @@ -1059,11 +1066,21 @@ int maria_create(const char *name, enum log_array, NULL, NULL) || translog_flush(lsn))) goto err; + share.kfile.file= file; + DBUG_EXECUTE_IF("maria_flush_whole_log", + { + DBUG_PRINT("maria_flush_whole_log", ("now")); + translog_flush(translog_get_horizon()); + }); + DBUG_EXECUTE_IF("maria_crash_create_table", + { + DBUG_PRINT("maria_crash_create_table", ("now")); + DBUG_ABORT(); + }); /* store LSN into file, needed for Recovery to not be confused if a DROP+CREATE happened (applying REDOs to the wrong table). */ - share.kfile.file= file; if (_ma_update_state_lsns_sub(&share, lsn, trnman_get_min_safe_trid(), FALSE, TRUE)) goto err; === modified file 'storage/maria/ma_delete_all.c' --- a/storage/maria/ma_delete_all.c 2008-06-26 05:18:28 +0000 +++ b/storage/maria/ma_delete_all.c 2009-01-16 21:00:39 +0000 @@ -115,6 +115,12 @@ int maria_delete_all_rows(MARIA_HA *info but redo_insert are skipped (dirty pages list is empty). To avoid this, we need to set skip_redo_lsn now, and thus need to sync files. + Also fixes the problem of: + bulk insert; insert; delete_all; crash: + "bulk insert" is skipped (no REDOs), so if "insert" would not be skipped + (if we didn't update skip_redo_lsn below) then "insert" would be tried + and fail, saying that it sees that the first page has to be created + though the inserted row has rownr>0. */ my_bool error= _ma_state_info_write(share, 1|4) || _ma_update_state_lsns(share, lsn, trnman_get_min_trid(), FALSE, FALSE) ||