List:Commits« Previous MessageNext Message »
From:Kevin Lewis Date:July 10 2009 1:37pm
Subject:Re: bzr commit into mysql-6.0-falcon-team branch (olav:2753) Bug#45301
View as plain text  
Olav,  This code looks good.  OK to push.

Olav Sandstaa wrote:
> #At file:///home/os136802/mysql/develop/repo/falcon-bug45301-test/ based on
> revid:john.embretsen@stripped
> 
>  2753 Olav Sandstaa	2009-07-10
>       Fix for Bug#45301 Recovery crash due to fetched page marked free in PIP
>       
>       This crash occured during recovery due to at the time of the crash the main
> data page for a record had
>       written to the data base file while the PIP and the newly allocatedoverflow
> page for the record had not 
>       been written. The corresponding SRLOverflowPages log record was likely not
> flushed to the serial log.
>       This situation happened likely due to MySQL being killed during a Falcon check
> point.
>       
>       This is now handled by extending the IO::readPage() so that an exception is
> thrown in the cases there
>       is reads beyond end-of-file during recovery. This exception is caught in
> DataPage::deleteOverflowPages.
>       If it happens during recovery, DataPage::deleteOverflowPages will handle the
> exception by marking the
>       overflow page as free in the Page Inventory Page.
>       
>       Note that if there are more than one overflow page for this record we might
> have permanent loss of a few
>       blocks in the database.
>       
>       The patch also fixes an missing handling of exceptions in Cache::fetchPage()
> where an IO excecption would
>       lead to a BDB having a use count that never got released and which resulted in
> a crash during shutdown.
>      @ storage/falcon/Cache.cpp
>         In Cache::fetchPage: Add missing handling of exceptions that might occur
> during reading of pages. If an exception was thrown the old code would not do proper
> clean-up and the allocated BDB would still be allocated and have a use count. This would
> never be released, take a buffer slot forever and lead to a crash during normal shutdown
> due to the use count on the BDB. The changed code will handle the exception, release the
> allocated BDB and rethrow the exception.
>      @ storage/falcon/DataPage.cpp
>         Add handling of read errors that occures when trying the access an overflow
> page that should be deleted during recovery. If an IO error occurs we handle this by
> ignoring this overflow page and just mark it as free in the Page Inventory Page.
>      @ storage/falcon/Dbb.cpp
>         Add a method to be used for checking if we currently is doing recovery on the
> DBB.
>      @ storage/falcon/Dbb.h
>         Add a method to be used for checking if we currently is doing recovery on the
> DBB.
>      @ storage/falcon/IO.cpp
>         Change how a read error that occurs during recovery is handled:
>         1. Do not set the "fatal error" status of the IO class
>         2. Throw an exception also if it is read beyond end-of-file (instead of
> crashing)
> 
>     modified:
>       storage/falcon/Cache.cpp
>       storage/falcon/DataPage.cpp
>       storage/falcon/Dbb.cpp
>       storage/falcon/Dbb.h
>       storage/falcon/IO.cpp
> === modified file 'storage/falcon/Cache.cpp'
> --- a/storage/falcon/Cache.cpp	2009-04-09 19:15:59 +0000
> +++ b/storage/falcon/Cache.cpp	2009-07-10 12:52:00 +0000
> @@ -282,11 +282,25 @@ Bdb* Cache::fetchPage(Dbb *dbb, int32 pa
>  #endif
>  			
>  			Priority priority(database->ioScheduler);
> -			priority.schedule(PRIORITY_MEDIUM);	
> -			if (falcon_use_sectorcache)
> -				sectorCache->readPage(bdb);
> -			else
> -				dbb->readPage(bdb);
> +			priority.schedule(PRIORITY_MEDIUM);
> +
> +			try 
> +				{
> +				if (falcon_use_sectorcache)
> +					sectorCache->readPage(bdb);
> +				else
> +					dbb->readPage(bdb);
> +				}
> +			catch (SQLException& exception)
> +				{
> +				// If the read operation throws an exception we need to 
> +				// release the alloacted buffer slot before re-throwing the
> +				// exception
> +
> +				bdb->release(REL_HISTORY);
> +				throw;
> +				}
> +
>  			priority.finished();
>  			if(bdb->buffer->pageNumber != pageNumber)
>  				{
> @@ -376,7 +390,6 @@ void Cache::flush(int64 arg)
>  
>  	syncWait.lock(NULL, Exclusive);
>  	sync.lock(Shared);
> -	//Log::debug(%d: "Initiating flush\n", dbb->deltaTime);
>  	flushArg = arg;
>  	flushPages = 0;
>  	physicalWrites = 0;
> 
> === modified file 'storage/falcon/DataPage.cpp'
> --- a/storage/falcon/DataPage.cpp	2009-06-22 12:23:12 +0000
> +++ b/storage/falcon/DataPage.cpp	2009-07-10 12:52:00 +0000
> @@ -30,6 +30,8 @@
>  #include "Log.h"
>  #include "LogLock.h"
>  #include "SerialLog.h"
> +#include "SQLException.h"
> +#include "PageInventoryPage.h"
>  
>  #ifdef _DEBUG
>  #undef THIS_FILE
> @@ -375,7 +377,42 @@ void DataPage::deleteOverflowPages(Dbb *
>  				return;
>  			log->setOverflowPageInvalid(overflowPageNumber, dbb->tableSpaceId);
>  			}
> -		Bdb *bdb = dbb->fetchPage (overflowPageNumber, PAGE_data_overflow, Exclusive);
> +
> +		Bdb *bdb;
> +
> +		// Read the overflow page. Note that during recovery the overflow page
> +		// might not be found in the table space file due to having
> +		// crashed during a check point. We handle this by catching the
> +		// IO exception and marking the page as free.
> +
> +		try
> +			{
> +			bdb = dbb->fetchPage (overflowPageNumber, PAGE_data_overflow, Exclusive);
> +			}
> +		catch (SQLException& exception)
> +			{
> +			
> +			if (log->recovering && exception.getSqlcode() == IO_ERROR)
> +				{
> +				// Mark this page as available in PIP
> +				
> +				Log::debug("During recovery: reading overflow page %d failed with IO error:
> %s\n",
> +						   overflowPageNumber, exception.getText());
> +				Log::debug("During recovery: marking overflow page %d as free in PIP\n",
> +						   overflowPageNumber);
> +
> +				PageInventoryPage::freePage(dbb, overflowPageNumber, transId);
> +				break;
> +				}
> +			else
> +				{
> +				// If this exception occured during normal operation or if 
> +				// it is not an IO exception we re-throw it
> +
> +				throw;
> +				}
> +			}
> +
>  		BDB_HISTORY(bdb);
>  		DataOverflowPage *page = (DataOverflowPage*) bdb->buffer;
>  		overflowPageNumber = page->nextPage;
> 
> === modified file 'storage/falcon/Dbb.cpp'
> --- a/storage/falcon/Dbb.cpp	2009-05-12 22:58:23 +0000
> +++ b/storage/falcon/Dbb.cpp	2009-07-10 12:52:00 +0000
> @@ -978,6 +978,11 @@ bool Dbb::deleteShadow(DatabaseCopy *sha
>  	return false;
>  }
>  
> +bool Dbb::isRecovering()
> +{
> +	return serialLog && serialLog->recovering;
> +}
> +
>  void Dbb::printPage(int pageNumber)
>  {
>  	Bdb *bdb = fetchPage (pageNumber, PAGE_any, Shared);
> 
> === modified file 'storage/falcon/Dbb.h'
> --- a/storage/falcon/Dbb.h	2009-04-09 17:31:22 +0000
> +++ b/storage/falcon/Dbb.h	2009-07-10 12:52:00 +0000
> @@ -130,6 +130,7 @@ public:
>  	void	cloneFile(Database *database, const char *fileName, bool createShadow);
>  	void	addShadow(DatabaseCopy* shadow);
>  	bool	deleteShadow(DatabaseCopy *shadow);
> +	bool	isRecovering();
>  
>  	// Cache oriened functions
>  	void	validateCache(void);
> 
> === modified file 'storage/falcon/IO.cpp'
> --- a/storage/falcon/IO.cpp	2009-06-26 11:13:44 +0000
> +++ b/storage/falcon/IO.cpp	2009-07-10 12:52:00 +0000
> @@ -83,6 +83,7 @@ static  int getLinuxVersion();
>  #include <errno.h>
>  #include "IOx.h"
>  #include "BDB.h"
> +#include "Dbb.h"
>  #include "Hdr.h"
>  #include "SQLError.h"
>  #include "Sync.h"
> @@ -291,8 +292,16 @@ void IO::readPage(Bdb * bdb)
>  
>  	if (length != pageSize)
>  		{
> -		declareFatalError("IO::readPage", bdb->pageNumber, errno);
> -		if (length == -1)
> +		// Unless this error happens during recovery we set the status to
> +		// "fatal" to prevent further operations on the file
> +
> +		if (!bdb->dbb->isRecovering())
> +			declareFatalError("IO::readPage", bdb->pageNumber, errno);
> +
> +		// If an IO error has occured (length == -1) or this is during
> +		// recovery we throw an exception.
> +
> +		if (length == -1 || bdb->dbb->isRecovering())
>  			{
>  			throw SQLError(IO_ERROR, "read error on page %d of \"%s\": %s (%d)",
>  				bdb->pageNumber, (const char*) fileName, strerror (errno), errno);
> 
> 
> 
> ------------------------------------------------------------------------
> 
> 
Thread
bzr commit into mysql-6.0-falcon-team branch (olav:2753) Bug#45301Olav Sandstaa10 Jul
  • Re: bzr commit into mysql-6.0-falcon-team branch (olav:2753) Bug#45301Kevin Lewis10 Jul