List:Commits« Previous MessageNext Message »
From:Olav Sandstaa Date:July 10 2009 12:52pm
Subject:bzr commit into mysql-6.0-falcon-team branch (olav:2753) Bug#45301
View as plain text  
#At file:///home/os136802/mysql/develop/repo/falcon-bug45301-test/ based on revid:john.embretsen@stripped

 2753 Olav Sandstaa	2009-07-10
      Fix for Bug#45301 Recovery crash due to fetched page marked free in PIP
      
      This crash occured during recovery due to at the time of the crash the main data page for a record had
      written to the data base file while the PIP and the newly allocatedoverflow page for the record had not 
      been written. The corresponding SRLOverflowPages log record was likely not flushed to the serial log.
      This situation happened likely due to MySQL being killed during a Falcon check point.
      
      This is now handled by extending the IO::readPage() so that an exception is thrown in the cases there
      is reads beyond end-of-file during recovery. This exception is caught in DataPage::deleteOverflowPages.
      If it happens during recovery, DataPage::deleteOverflowPages will handle the exception by marking the
      overflow page as free in the Page Inventory Page.
      
      Note that if there are more than one overflow page for this record we might have permanent loss of a few
      blocks in the database.
      
      The patch also fixes an missing handling of exceptions in Cache::fetchPage() where an IO excecption would
      lead to a BDB having a use count that never got released and which resulted in a crash during shutdown.
     @ storage/falcon/Cache.cpp
        In Cache::fetchPage: Add missing handling of exceptions that might occur during reading of pages. If an exception was thrown the old code would not do proper clean-up and the allocated BDB would still be allocated and have a use count. This would never be released, take a buffer slot forever and lead to a crash during normal shutdown due to the use count on the BDB. The changed code will handle the exception, release the allocated BDB and rethrow the exception.
     @ storage/falcon/DataPage.cpp
        Add handling of read errors that occures when trying the access an overflow page that should be deleted during recovery. If an IO error occurs we handle this by ignoring this overflow page and just mark it as free in the Page Inventory Page.
     @ storage/falcon/Dbb.cpp
        Add a method to be used for checking if we currently is doing recovery on the DBB.
     @ storage/falcon/Dbb.h
        Add a method to be used for checking if we currently is doing recovery on the DBB.
     @ storage/falcon/IO.cpp
        Change how a read error that occurs during recovery is handled:
        1. Do not set the "fatal error" status of the IO class
        2. Throw an exception also if it is read beyond end-of-file (instead of crashing)

    modified:
      storage/falcon/Cache.cpp
      storage/falcon/DataPage.cpp
      storage/falcon/Dbb.cpp
      storage/falcon/Dbb.h
      storage/falcon/IO.cpp
=== modified file 'storage/falcon/Cache.cpp'
--- a/storage/falcon/Cache.cpp	2009-04-09 19:15:59 +0000
+++ b/storage/falcon/Cache.cpp	2009-07-10 12:52:00 +0000
@@ -282,11 +282,25 @@ Bdb* Cache::fetchPage(Dbb *dbb, int32 pa
 #endif
 			
 			Priority priority(database->ioScheduler);
-			priority.schedule(PRIORITY_MEDIUM);	
-			if (falcon_use_sectorcache)
-				sectorCache->readPage(bdb);
-			else
-				dbb->readPage(bdb);
+			priority.schedule(PRIORITY_MEDIUM);
+
+			try 
+				{
+				if (falcon_use_sectorcache)
+					sectorCache->readPage(bdb);
+				else
+					dbb->readPage(bdb);
+				}
+			catch (SQLException& exception)
+				{
+				// If the read operation throws an exception we need to 
+				// release the alloacted buffer slot before re-throwing the
+				// exception
+
+				bdb->release(REL_HISTORY);
+				throw;
+				}
+
 			priority.finished();
 			if(bdb->buffer->pageNumber != pageNumber)
 				{
@@ -376,7 +390,6 @@ void Cache::flush(int64 arg)
 
 	syncWait.lock(NULL, Exclusive);
 	sync.lock(Shared);
-	//Log::debug(%d: "Initiating flush\n", dbb->deltaTime);
 	flushArg = arg;
 	flushPages = 0;
 	physicalWrites = 0;

=== modified file 'storage/falcon/DataPage.cpp'
--- a/storage/falcon/DataPage.cpp	2009-06-22 12:23:12 +0000
+++ b/storage/falcon/DataPage.cpp	2009-07-10 12:52:00 +0000
@@ -30,6 +30,8 @@
 #include "Log.h"
 #include "LogLock.h"
 #include "SerialLog.h"
+#include "SQLException.h"
+#include "PageInventoryPage.h"
 
 #ifdef _DEBUG
 #undef THIS_FILE
@@ -375,7 +377,42 @@ void DataPage::deleteOverflowPages(Dbb *
 				return;
 			log->setOverflowPageInvalid(overflowPageNumber, dbb->tableSpaceId);
 			}
-		Bdb *bdb = dbb->fetchPage (overflowPageNumber, PAGE_data_overflow, Exclusive);
+
+		Bdb *bdb;
+
+		// Read the overflow page. Note that during recovery the overflow page
+		// might not be found in the table space file due to having
+		// crashed during a check point. We handle this by catching the
+		// IO exception and marking the page as free.
+
+		try
+			{
+			bdb = dbb->fetchPage (overflowPageNumber, PAGE_data_overflow, Exclusive);
+			}
+		catch (SQLException& exception)
+			{
+			
+			if (log->recovering && exception.getSqlcode() == IO_ERROR)
+				{
+				// Mark this page as available in PIP
+				
+				Log::debug("During recovery: reading overflow page %d failed with IO error: %s\n",
+						   overflowPageNumber, exception.getText());
+				Log::debug("During recovery: marking overflow page %d as free in PIP\n",
+						   overflowPageNumber);
+
+				PageInventoryPage::freePage(dbb, overflowPageNumber, transId);
+				break;
+				}
+			else
+				{
+				// If this exception occured during normal operation or if 
+				// it is not an IO exception we re-throw it
+
+				throw;
+				}
+			}
+
 		BDB_HISTORY(bdb);
 		DataOverflowPage *page = (DataOverflowPage*) bdb->buffer;
 		overflowPageNumber = page->nextPage;

=== modified file 'storage/falcon/Dbb.cpp'
--- a/storage/falcon/Dbb.cpp	2009-05-12 22:58:23 +0000
+++ b/storage/falcon/Dbb.cpp	2009-07-10 12:52:00 +0000
@@ -978,6 +978,11 @@ bool Dbb::deleteShadow(DatabaseCopy *sha
 	return false;
 }
 
+bool Dbb::isRecovering()
+{
+	return serialLog && serialLog->recovering;
+}
+
 void Dbb::printPage(int pageNumber)
 {
 	Bdb *bdb = fetchPage (pageNumber, PAGE_any, Shared);

=== modified file 'storage/falcon/Dbb.h'
--- a/storage/falcon/Dbb.h	2009-04-09 17:31:22 +0000
+++ b/storage/falcon/Dbb.h	2009-07-10 12:52:00 +0000
@@ -130,6 +130,7 @@ public:
 	void	cloneFile(Database *database, const char *fileName, bool createShadow);
 	void	addShadow(DatabaseCopy* shadow);
 	bool	deleteShadow(DatabaseCopy *shadow);
+	bool	isRecovering();
 
 	// Cache oriened functions
 	void	validateCache(void);

=== modified file 'storage/falcon/IO.cpp'
--- a/storage/falcon/IO.cpp	2009-06-26 11:13:44 +0000
+++ b/storage/falcon/IO.cpp	2009-07-10 12:52:00 +0000
@@ -83,6 +83,7 @@ static  int getLinuxVersion();
 #include <errno.h>
 #include "IOx.h"
 #include "BDB.h"
+#include "Dbb.h"
 #include "Hdr.h"
 #include "SQLError.h"
 #include "Sync.h"
@@ -291,8 +292,16 @@ void IO::readPage(Bdb * bdb)
 
 	if (length != pageSize)
 		{
-		declareFatalError("IO::readPage", bdb->pageNumber, errno);
-		if (length == -1)
+		// Unless this error happens during recovery we set the status to
+		// "fatal" to prevent further operations on the file
+
+		if (!bdb->dbb->isRecovering())
+			declareFatalError("IO::readPage", bdb->pageNumber, errno);
+
+		// If an IO error has occured (length == -1) or this is during
+		// recovery we throw an exception.
+
+		if (length == -1 || bdb->dbb->isRecovering())
 			{
 			throw SQLError(IO_ERROR, "read error on page %d of \"%s\": %s (%d)",
 				bdb->pageNumber, (const char*) fileName, strerror (errno), errno);


Attachment: [text/bzr-bundle] bzr/olav@sun.com-20090710125200-1p67jh1x4c61zng5.bundle
Thread
bzr commit into mysql-6.0-falcon-team branch (olav:2753) Bug#45301Olav Sandstaa10 Jul
  • Re: bzr commit into mysql-6.0-falcon-team branch (olav:2753) Bug#45301Kevin Lewis10 Jul