List:Commits« Previous MessageNext Message »
From:Olav Sandstaa Date:July 8 2008 4:22pm
Subject:Re: bzr commit into mysql-6.0-falcon branch (vvaintroub:2732)
View as plain text  
Vlad,

Thanks for the updated patch and for taking into account the comments I 
had.
I think the patch looks very good and have no further comments.

OK to push.

Regards,
Olav


Vladislav Vaintroub wrote:
> #At file:///C:/bzr/mysql-6.0-falcon/
>
>  2732 Vladislav Vaintroub	2008-07-07
>       WL4173: Falcon checksum pages
>       
>       Checksum is updated when one or saveral pages are written to disk.
>       
>       Checksum is *not* verified  pages are read from disk to 
>       sector cache,  but first time it is copied from sector cache to 
>       the page cache.
>       
>       However preparations are made to verify checksum directly after page 
>       is read from  from disk, in case sector cache will be made optional
>       or abandoned.
>       
>       The algorithm used to calculate checksum  is  home-backed simplified 
>       fletcher (64bit integers, no 1's complement arithmetic, no care for overflow).
>       Should be ok for type of errors we expect.
> modified:
>   mysql-test/suite/falcon/r/falcon_options.result
>   mysql-test/suite/falcon/r/falcon_options2.result
>   storage/falcon/IO.cpp
>   storage/falcon/IOx.h
>   storage/falcon/Page.h
>   storage/falcon/SectorBuffer.cpp
>   storage/falcon/StorageParameters.h
>
> per-file messages:
>   mysql-test/suite/falcon/r/falcon_options.result
>     New parameter falcon_checksums
>   mysql-test/suite/falcon/r/falcon_options2.result
>     NEw parameter falcon_checksums
>   storage/falcon/IO.cpp
>     New functions to compute and validate checksums.
>   storage/falcon/IOx.h
>     New functions computeChecksum and validateChecksum
>   storage/falcon/Page.h
>     checksum made uint (saves couple of casts)
>   storage/falcon/SectorBuffer.cpp
>     Validate checksum when reading from SectorBuffer.
>     Do it only once and disable the check afterwards
>   storage/falcon/StorageParameters.h
>     New parameter falcon_checksums
> === modified file 'mysql-test/suite/falcon/r/falcon_options.result'
> --- a/mysql-test/suite/falcon/r/falcon_options.result	2008-06-26 13:30:24 +0000
> +++ b/mysql-test/suite/falcon/r/falcon_options.result	2008-07-07 14:00:45 +0000
> @@ -1,6 +1,7 @@
>  SHOW VARIABLES LIKE 'falcon_%';
>  Variable_name	Value
>  falcon_checkpoint_schedule	7 * * * * *
> +falcon_checksums	ON
>  falcon_consistent_read	ON
>  falcon_debug_mask	0
>  falcon_debug_server	OFF
> @@ -90,6 +91,7 @@ SELECT * FROM INFORMATION_SCHEMA.global_
>  WHERE variable_name LIKE 'falcon%';
>  VARIABLE_NAME	VARIABLE_VALUE
>  FALCON_CHECKPOINT_SCHEDULE	7 * * * * *
> +FALCON_CHECKSUMS	ON
>  FALCON_CONSISTENT_READ	ON
>  FALCON_DEBUG_MASK	4096
>  FALCON_DEBUG_SERVER	OFF
>
> === modified file 'mysql-test/suite/falcon/r/falcon_options2.result'
> --- a/mysql-test/suite/falcon/r/falcon_options2.result	2008-06-26 13:30:24 +0000
> +++ b/mysql-test/suite/falcon/r/falcon_options2.result	2008-07-07 14:00:45 +0000
> @@ -2,6 +2,7 @@ SELECT * FROM INFORMATION_SCHEMA.global_
>  WHERE variable_name LiKE 'falcon_%';
>  VARIABLE_NAME	VARIABLE_VALUE
>  FALCON_CHECKPOINT_SCHEDULE	7 * * * * *
> +FALCON_CHECKSUMS	ON
>  FALCON_CONSISTENT_READ	ON
>  FALCON_DEBUG_MASK	0
>  FALCON_DEBUG_SERVER	OFF
>
> === modified file 'storage/falcon/IO.cpp'
> --- a/storage/falcon/IO.cpp	2008-04-29 08:39:19 +0000
> +++ b/storage/falcon/IO.cpp	2008-07-07 14:00:45 +0000
> @@ -100,10 +100,14 @@ static  int getLinuxVersion();
>  //#define SIMULATE_DISK_FULL					1000000;
>  
>  extern uint		falcon_direct_io;
> +extern char		falcon_checksums;
>  
>  static const int TRACE_SYNC_START	= -1;
>  static const int TRACE_SYNC_END		= -2;
>  
> +
> +static const uint16 NON_ZERO_CHECKSUM_MAGIC = 0xAFFE;
> +
>  #ifdef SIMULATE_DISK_FULL
>  static int simulateDiskFull = SIMULATE_DISK_FULL;
>  #endif
> @@ -253,6 +257,23 @@ void IO::readPage(Bdb * bdb)
>  		}
>  
>  	++reads;
> +
> +	if(falcon_checksums)
> +		validateChecksum(bdb->buffer, pageSize, offset);
> +}
> +
> +void IO::validateChecksum(Page *page, size_t pageSize, int64 fileOffset)
> +{
> +	if (page->checksum == NO_CHECKSUM_MAGIC)
> +		return;
> +
> +	uint16 chksum = computeChecksum(page, pageSize);
> +
> +	if (page->checksum != chksum)
> +		FATAL("Checksum error (expected %d, got %d) at page %d"
> +			"file '%s', offset " I64FORMAT ", pagesize %d",
> +			(int)chksum, (int)page->checksum, (int)(fileOffset/pageSize),
> +			fileName, fileOffset, pageSize);
>  }
>  
>  bool IO::trialRead(Bdb *bdb)
> @@ -278,23 +299,7 @@ void IO::writePage(Bdb * bdb, int type)
>  
>  	ASSERT(bdb->pageNumber != HEADER_PAGE || ((Page*)(bdb->buffer))->pageType
> == PAGE_header);
>  	tracePage(bdb);
> -	SEEK_OFFSET offset = (int64) bdb->pageNumber * pageSize;
> -	int length = pwrite (offset, pageSize, (UCHAR *)bdb->buffer);
> -
> -	if (length != pageSize)
> -		{
> -		if (errno == ENOSPC)
> -			throw SQLError(DEVICE_FULL, "device full error on %s, page %d\n", (const char*)
> fileName, bdb->pageNumber);
> -
> -		declareFatalError();
> -		FATAL ("write error on page %d (%d/%d/%d) of \"%s\": %s (%d)",
> -				bdb->pageNumber, length, pageSize, fileId,
> -				(const char*) fileName, strerror (errno), errno);
> -		}
> -
> -	++writes;
> -	++writesSinceSync;
> -	++writeTypes[type];
> +	writePages(bdb->pageNumber, pageSize, (UCHAR *)bdb->buffer, type);
>  }
>  
>  void IO::writePages(int32 pageNumber, int length, const UCHAR* data, int type)
> @@ -308,7 +313,17 @@ void IO::writePages(int32 pageNumber, in
>  	if (simulateDiskFull && offset + length > simulateDiskFull)
>  		throw SQLError(DEVICE_FULL, "device full error on %s, page %d\n", (const char*)
> fileName, pageNumber);
>  #endif
> -	
> +
> +	Page *page = (Page *)data;
> +
> +	for (int i = 0;i < length/pageSize; i++ ,page = (Page *)((UCHAR *)page +
> pageSize))
> +		{
> +		if (falcon_checksums)
> +			page->checksum = computeChecksum(page, pageSize);
> + 		else
> +			page->checksum = NO_CHECKSUM_MAGIC;
> +		}
> +
>  	int ret = pwrite (offset, length, data);
>  
>  	if (ret != length)
> @@ -330,15 +345,20 @@ void IO::writePages(int32 pageNumber, in
>  
>  void IO::readHeader(Hdr * header)
>  {
> +	static const int maxPageSize = 32*1024;
>  	static const int sectorSize = 4096;
> -	UCHAR temp[sectorSize * 2];
> -	UCHAR *buffer = (UCHAR*) (((UIPTR) temp + sectorSize - 1) / sectorSize *
> sectorSize);
> +	UCHAR temp[maxPageSize + sectorSize];
> +	UCHAR *buffer = ALIGN(temp, sectorSize);
>  	int n = lseek (fileId, 0, SEEK_SET);
> -	n = ::read (fileId, buffer, sectorSize);
> +	n = ::read (fileId, buffer, maxPageSize);
>  
>  	if (n < (int) sizeof (Hdr))
>  		FATAL ("read error on database header");
> -	
> +
> +	Hdr* hdr = (Hdr*) buffer;
> +	if (falcon_checksums && hdr->pageSize <= n)
> +		validateChecksum((Page*) buffer, hdr->pageSize, 0);
> +
>  	memcpy(header, buffer, sizeof(Hdr));
>  }
>  
> @@ -679,3 +699,52 @@ static int getLinuxVersion()
>  	return vers = KERNEL_VERSION(major,minor,release);
>  }
>  #endif
> +
> +
> +
> +/*
> +  Calculate checksum for a page.
> +  The algorithm somewhat resembles fletcher checksum , but it is performed
> +  on 64 bit integers and two's complement arithmetic, without taking care
> +  of overflow.
> +
> +  Also,
> +  - Calculation skips 3rd and 4th bytes in the page 
> +   (which is checksum field in the page header)
> +  - Returned result is never 0
> +*/
> +uint16 IO::computeChecksum(Page *page, size_t len)
> +{
> +	uint64 sum1,sum2;
> +	uint64 *data = (uint64 *)page;
> +	uint64 *end  = data + len/8;
> +
> +	ASSERT(len >=8 && OFFSET(Page*,checksum)==2 &&
> sizeof(page->checksum)==2);
> +
> +	// Initialize sums, mask out checksum bytes in the page header
> +	static uint16 endian = 1;
> +	uint64 mask = *(char *)(&endian)?0xFFFFFFFF0000FFFFLL:0xFFFF0000FFFFFFFFLL;
> +	
> +	sum1 = sum2 = *data & mask;
> +
> +	data++;
> +
> +	while(data < end)
> +		{
> +		sum1 += *data++;
> +		sum2 += sum1;
> +		}
> +
> +	uint64 sum = sum1 + sum2;
> +
> +	// Fold the result to 16 bit
> +	while(sum >> 16)
> +		sum = (sum & 0xFFFF) + (sum >> 16);
> +
> +	if(sum == 0)
> +		sum = NON_ZERO_CHECKSUM_MAGIC;
> +
> +	return (uint16) sum;
> +
> +}
> +
>
> === modified file 'storage/falcon/IOx.h'
> --- a/storage/falcon/IOx.h	2008-03-27 06:09:29 +0000
> +++ b/storage/falcon/IOx.h	2008-07-07 14:00:45 +0000
> @@ -36,9 +36,12 @@ static const int WRITE_TYPE_CLONE		= 4;
>  static const int WRITE_TYPE_FLUSH		= 5;
>  static const int WRITE_TYPE_MAX			= 6;
>  
> +static const uint16 NO_CHECKSUM_MAGIC = 0;
> +
>  class Bdb;
>  class Hdr;
>  class Dbb;
> +class Page;
>  
>  class IO
>  {
> @@ -75,7 +78,8 @@ public:
>  	static void		trace(int fd, int pageNumber, int pageType, int pageId);
>  	static void		traceOpen(void);
>  	static void		traceClose(void);
> -	
> +	static uint16	computeChecksum(Page *page, size_t pageSize);
> +	void			validateChecksum(Page *page, size_t pageSize, int64 fileOffset);
>  	static void		createPath (const char *fileName);
>  	static const char *baseName(const char *path);
>  	static void		expandFileName(const char *fileName, int length, char *buffer, const
> char **baseFileName = NULL);
>
> === modified file 'storage/falcon/Page.h'
> --- a/storage/falcon/Page.h	2008-06-17 17:41:54 +0000
> +++ b/storage/falcon/Page.h	2008-07-07 14:00:45 +0000
> @@ -58,8 +58,8 @@ class EncodedDataStream;
>  class Page  
>  {
>  public:
> -	short	pageType;
> -	short	checksum;
> +	int16	pageType;
> +	uint16	checksum;
>  
>  #ifdef HAVE_PAGE_NUMBER
>  	int32	pageNumber;
>
> === modified file 'storage/falcon/SectorBuffer.cpp'
> --- a/storage/falcon/SectorBuffer.cpp	2008-07-02 20:55:11 +0000
> +++ b/storage/falcon/SectorBuffer.cpp	2008-07-07 14:00:45 +0000
> @@ -19,6 +19,7 @@
>  #include "SectorCache.h"
>  #include "BDB.h"
>  #include "Dbb.h"
> +#include "Page.h"
>  
>  SectorBuffer::SectorBuffer()
>  {
> @@ -32,9 +33,29 @@ SectorBuffer::~SectorBuffer(void)
>  
>  void SectorBuffer::readPage(Bdb* bdb)
>  {
> -	int offset = (bdb->pageNumber % cache->pagesPerSector) * cache->pageSize;
> +	int pageSize  = cache->pageSize;
> +
> +	int offset = (bdb->pageNumber % cache->pagesPerSector) * pageSize;
>  	ASSERT(offset < activeLength);
> -	memcpy(bdb->buffer, buffer + offset, cache->pageSize);
> +	
> +	Page *page = (Page *)(buffer + offset);
> +
> +	/*
> +	Validate page checksum.
> +	Do it only once and after that reset the checksum field. It is necessary
> +	because later the checksum in header might be incorrect (when page is read,
> +	modified and written back to buffer┤but not yet to disk).Also, the same page
> +	might be read multiple times and we want to avoid the checksum calculation
> +	overhead.
> +	*/
> +	if(page->checksum != NO_CHECKSUM_MAGIC)
> +		{
> +		dbb->validateChecksum(page, pageSize, ((int64)bdb->pageNumber) * pageSize);
> +		page->checksum = NO_CHECKSUM_MAGIC;
> +		}
> +	memcpy(bdb->buffer, page, pageSize);
> +
> +
>  }
>  
>  void SectorBuffer::readSector()
>
> === modified file 'storage/falcon/StorageParameters.h'
> --- a/storage/falcon/StorageParameters.h	2008-06-26 13:30:24 +0000
> +++ b/storage/falcon/StorageParameters.h	2008-07-07 14:00:45 +0000
> @@ -11,7 +11,7 @@
>  // #define PLUGIN_VAR_OPCMDARG     0x2000 = Argument optional for cmd line
>  // #define PLUGIN_VAR_MEMALLOC     0x8000 = String needs memory allocated
>  
> -
> +PARAMETER_BOOL(checksums, "Enable Falcon checksum validation", 1, 0x200, NULL)
>  PARAMETER_UINT(debug_mask, "Falcon message type mask for logged messages.", 0, 0,
> INT_MAX, 0, StorageInterface::updateDebugMask)
>  PARAMETER_BOOL(debug_server, "Enable Falcon debug code.", 0, 0x0200, NULL)
>  PARAMETER_UINT(debug_trace, "Falcon debug trace trigger.", 0, 0, INT_MAX, 0, NULL)
>
>   

Thread
bzr commit into mysql-6.0-falcon branch (vvaintroub:2732) Vladislav Vaintroub7 Jul
  • RE: bzr commit into mysql-6.0-falcon branch (vvaintroub:2732) Kevin Lewis7 Jul
    • RE: bzr commit into mysql-6.0-falcon branch (vvaintroub:2732) Vladislav Vaintroub7 Jul
  • Re: bzr commit into mysql-6.0-falcon branch (vvaintroub:2732)Olav Sandstaa8 Jul