#At file:///home/os136802/mysql/develop/repo/falcon-bug45301-test/ based on revid:john.embretsen@stripped
2753 Olav Sandstaa 2009-07-10
Fix for Bug#45301 Recovery crash due to fetched page marked free in PIP
This crash occured during recovery due to at the time of the crash the main data page for a record had
written to the data base file while the PIP and the newly allocatedoverflow page for the record had not
been written. The corresponding SRLOverflowPages log record was likely not flushed to the serial log.
This situation happened likely due to MySQL being killed during a Falcon check point.
This is now handled by extending the IO::readPage() so that an exception is thrown in the cases there
is reads beyond end-of-file during recovery. This exception is caught in DataPage::deleteOverflowPages.
If it happens during recovery, DataPage::deleteOverflowPages will handle the exception by marking the
overflow page as free in the Page Inventory Page.
Note that if there are more than one overflow page for this record we might have permanent loss of a few
blocks in the database.
The patch also fixes an missing handling of exceptions in Cache::fetchPage() where an IO excecption would
lead to a BDB having a use count that never got released and which resulted in a crash during shutdown.
@ storage/falcon/Cache.cpp
In Cache::fetchPage: Add missing handling of exceptions that might occur during reading of pages. If an exception was thrown the old code would not do proper clean-up and the allocated BDB would still be allocated and have a use count. This would never be released, take a buffer slot forever and lead to a crash during normal shutdown due to the use count on the BDB. The changed code will handle the exception, release the allocated BDB and rethrow the exception.
@ storage/falcon/DataPage.cpp
Add handling of read errors that occures when trying the access an overflow page that should be deleted during recovery. If an IO error occurs we handle this by ignoring this overflow page and just mark it as free in the Page Inventory Page.
@ storage/falcon/Dbb.cpp
Add a method to be used for checking if we currently is doing recovery on the DBB.
@ storage/falcon/Dbb.h
Add a method to be used for checking if we currently is doing recovery on the DBB.
@ storage/falcon/IO.cpp
Change how a read error that occurs during recovery is handled:
1. Do not set the "fatal error" status of the IO class
2. Throw an exception also if it is read beyond end-of-file (instead of crashing)
modified:
storage/falcon/Cache.cpp
storage/falcon/DataPage.cpp
storage/falcon/Dbb.cpp
storage/falcon/Dbb.h
storage/falcon/IO.cpp
=== modified file 'storage/falcon/Cache.cpp'
--- a/storage/falcon/Cache.cpp 2009-04-09 19:15:59 +0000
+++ b/storage/falcon/Cache.cpp 2009-07-10 12:52:00 +0000
@@ -282,11 +282,25 @@ Bdb* Cache::fetchPage(Dbb *dbb, int32 pa
#endif
Priority priority(database->ioScheduler);
- priority.schedule(PRIORITY_MEDIUM);
- if (falcon_use_sectorcache)
- sectorCache->readPage(bdb);
- else
- dbb->readPage(bdb);
+ priority.schedule(PRIORITY_MEDIUM);
+
+ try
+ {
+ if (falcon_use_sectorcache)
+ sectorCache->readPage(bdb);
+ else
+ dbb->readPage(bdb);
+ }
+ catch (SQLException& exception)
+ {
+ // If the read operation throws an exception we need to
+ // release the alloacted buffer slot before re-throwing the
+ // exception
+
+ bdb->release(REL_HISTORY);
+ throw;
+ }
+
priority.finished();
if(bdb->buffer->pageNumber != pageNumber)
{
@@ -376,7 +390,6 @@ void Cache::flush(int64 arg)
syncWait.lock(NULL, Exclusive);
sync.lock(Shared);
- //Log::debug(%d: "Initiating flush\n", dbb->deltaTime);
flushArg = arg;
flushPages = 0;
physicalWrites = 0;
=== modified file 'storage/falcon/DataPage.cpp'
--- a/storage/falcon/DataPage.cpp 2009-06-22 12:23:12 +0000
+++ b/storage/falcon/DataPage.cpp 2009-07-10 12:52:00 +0000
@@ -30,6 +30,8 @@
#include "Log.h"
#include "LogLock.h"
#include "SerialLog.h"
+#include "SQLException.h"
+#include "PageInventoryPage.h"
#ifdef _DEBUG
#undef THIS_FILE
@@ -375,7 +377,42 @@ void DataPage::deleteOverflowPages(Dbb *
return;
log->setOverflowPageInvalid(overflowPageNumber, dbb->tableSpaceId);
}
- Bdb *bdb = dbb->fetchPage (overflowPageNumber, PAGE_data_overflow, Exclusive);
+
+ Bdb *bdb;
+
+ // Read the overflow page. Note that during recovery the overflow page
+ // might not be found in the table space file due to having
+ // crashed during a check point. We handle this by catching the
+ // IO exception and marking the page as free.
+
+ try
+ {
+ bdb = dbb->fetchPage (overflowPageNumber, PAGE_data_overflow, Exclusive);
+ }
+ catch (SQLException& exception)
+ {
+
+ if (log->recovering && exception.getSqlcode() == IO_ERROR)
+ {
+ // Mark this page as available in PIP
+
+ Log::debug("During recovery: reading overflow page %d failed with IO error: %s\n",
+ overflowPageNumber, exception.getText());
+ Log::debug("During recovery: marking overflow page %d as free in PIP\n",
+ overflowPageNumber);
+
+ PageInventoryPage::freePage(dbb, overflowPageNumber, transId);
+ break;
+ }
+ else
+ {
+ // If this exception occured during normal operation or if
+ // it is not an IO exception we re-throw it
+
+ throw;
+ }
+ }
+
BDB_HISTORY(bdb);
DataOverflowPage *page = (DataOverflowPage*) bdb->buffer;
overflowPageNumber = page->nextPage;
=== modified file 'storage/falcon/Dbb.cpp'
--- a/storage/falcon/Dbb.cpp 2009-05-12 22:58:23 +0000
+++ b/storage/falcon/Dbb.cpp 2009-07-10 12:52:00 +0000
@@ -978,6 +978,11 @@ bool Dbb::deleteShadow(DatabaseCopy *sha
return false;
}
+bool Dbb::isRecovering()
+{
+ return serialLog && serialLog->recovering;
+}
+
void Dbb::printPage(int pageNumber)
{
Bdb *bdb = fetchPage (pageNumber, PAGE_any, Shared);
=== modified file 'storage/falcon/Dbb.h'
--- a/storage/falcon/Dbb.h 2009-04-09 17:31:22 +0000
+++ b/storage/falcon/Dbb.h 2009-07-10 12:52:00 +0000
@@ -130,6 +130,7 @@ public:
void cloneFile(Database *database, const char *fileName, bool createShadow);
void addShadow(DatabaseCopy* shadow);
bool deleteShadow(DatabaseCopy *shadow);
+ bool isRecovering();
// Cache oriened functions
void validateCache(void);
=== modified file 'storage/falcon/IO.cpp'
--- a/storage/falcon/IO.cpp 2009-06-26 11:13:44 +0000
+++ b/storage/falcon/IO.cpp 2009-07-10 12:52:00 +0000
@@ -83,6 +83,7 @@ static int getLinuxVersion();
#include <errno.h>
#include "IOx.h"
#include "BDB.h"
+#include "Dbb.h"
#include "Hdr.h"
#include "SQLError.h"
#include "Sync.h"
@@ -291,8 +292,16 @@ void IO::readPage(Bdb * bdb)
if (length != pageSize)
{
- declareFatalError("IO::readPage", bdb->pageNumber, errno);
- if (length == -1)
+ // Unless this error happens during recovery we set the status to
+ // "fatal" to prevent further operations on the file
+
+ if (!bdb->dbb->isRecovering())
+ declareFatalError("IO::readPage", bdb->pageNumber, errno);
+
+ // If an IO error has occured (length == -1) or this is during
+ // recovery we throw an exception.
+
+ if (length == -1 || bdb->dbb->isRecovering())
{
throw SQLError(IO_ERROR, "read error on page %d of \"%s\": %s (%d)",
bdb->pageNumber, (const char*) fileName, strerror (errno), errno);
Attachment: [text/bzr-bundle] bzr/olav@sun.com-20090710125200-1p67jh1x4c61zng5.bundle