Vlad,
Thanks for the updated patch and for taking into account the comments I
had.
I think the patch looks very good and have no further comments.
OK to push.
Regards,
Olav
Vladislav Vaintroub wrote:
> #At file:///C:/bzr/mysql-6.0-falcon/
>
> 2732 Vladislav Vaintroub 2008-07-07
> WL4173: Falcon checksum pages
>
> Checksum is updated when one or saveral pages are written to disk.
>
> Checksum is *not* verified pages are read from disk to
> sector cache, but first time it is copied from sector cache to
> the page cache.
>
> However preparations are made to verify checksum directly after page
> is read from from disk, in case sector cache will be made optional
> or abandoned.
>
> The algorithm used to calculate checksum is home-backed simplified
> fletcher (64bit integers, no 1's complement arithmetic, no care for overflow).
> Should be ok for type of errors we expect.
> modified:
> mysql-test/suite/falcon/r/falcon_options.result
> mysql-test/suite/falcon/r/falcon_options2.result
> storage/falcon/IO.cpp
> storage/falcon/IOx.h
> storage/falcon/Page.h
> storage/falcon/SectorBuffer.cpp
> storage/falcon/StorageParameters.h
>
> per-file messages:
> mysql-test/suite/falcon/r/falcon_options.result
> New parameter falcon_checksums
> mysql-test/suite/falcon/r/falcon_options2.result
> NEw parameter falcon_checksums
> storage/falcon/IO.cpp
> New functions to compute and validate checksums.
> storage/falcon/IOx.h
> New functions computeChecksum and validateChecksum
> storage/falcon/Page.h
> checksum made uint (saves couple of casts)
> storage/falcon/SectorBuffer.cpp
> Validate checksum when reading from SectorBuffer.
> Do it only once and disable the check afterwards
> storage/falcon/StorageParameters.h
> New parameter falcon_checksums
> === modified file 'mysql-test/suite/falcon/r/falcon_options.result'
> --- a/mysql-test/suite/falcon/r/falcon_options.result 2008-06-26 13:30:24 +0000
> +++ b/mysql-test/suite/falcon/r/falcon_options.result 2008-07-07 14:00:45 +0000
> @@ -1,6 +1,7 @@
> SHOW VARIABLES LIKE 'falcon_%';
> Variable_name Value
> falcon_checkpoint_schedule 7 * * * * *
> +falcon_checksums ON
> falcon_consistent_read ON
> falcon_debug_mask 0
> falcon_debug_server OFF
> @@ -90,6 +91,7 @@ SELECT * FROM INFORMATION_SCHEMA.global_
> WHERE variable_name LIKE 'falcon%';
> VARIABLE_NAME VARIABLE_VALUE
> FALCON_CHECKPOINT_SCHEDULE 7 * * * * *
> +FALCON_CHECKSUMS ON
> FALCON_CONSISTENT_READ ON
> FALCON_DEBUG_MASK 4096
> FALCON_DEBUG_SERVER OFF
>
> === modified file 'mysql-test/suite/falcon/r/falcon_options2.result'
> --- a/mysql-test/suite/falcon/r/falcon_options2.result 2008-06-26 13:30:24 +0000
> +++ b/mysql-test/suite/falcon/r/falcon_options2.result 2008-07-07 14:00:45 +0000
> @@ -2,6 +2,7 @@ SELECT * FROM INFORMATION_SCHEMA.global_
> WHERE variable_name LiKE 'falcon_%';
> VARIABLE_NAME VARIABLE_VALUE
> FALCON_CHECKPOINT_SCHEDULE 7 * * * * *
> +FALCON_CHECKSUMS ON
> FALCON_CONSISTENT_READ ON
> FALCON_DEBUG_MASK 0
> FALCON_DEBUG_SERVER OFF
>
> === modified file 'storage/falcon/IO.cpp'
> --- a/storage/falcon/IO.cpp 2008-04-29 08:39:19 +0000
> +++ b/storage/falcon/IO.cpp 2008-07-07 14:00:45 +0000
> @@ -100,10 +100,14 @@ static int getLinuxVersion();
> //#define SIMULATE_DISK_FULL 1000000;
>
> extern uint falcon_direct_io;
> +extern char falcon_checksums;
>
> static const int TRACE_SYNC_START = -1;
> static const int TRACE_SYNC_END = -2;
>
> +
> +static const uint16 NON_ZERO_CHECKSUM_MAGIC = 0xAFFE;
> +
> #ifdef SIMULATE_DISK_FULL
> static int simulateDiskFull = SIMULATE_DISK_FULL;
> #endif
> @@ -253,6 +257,23 @@ void IO::readPage(Bdb * bdb)
> }
>
> ++reads;
> +
> + if(falcon_checksums)
> + validateChecksum(bdb->buffer, pageSize, offset);
> +}
> +
> +void IO::validateChecksum(Page *page, size_t pageSize, int64 fileOffset)
> +{
> + if (page->checksum == NO_CHECKSUM_MAGIC)
> + return;
> +
> + uint16 chksum = computeChecksum(page, pageSize);
> +
> + if (page->checksum != chksum)
> + FATAL("Checksum error (expected %d, got %d) at page %d"
> + "file '%s', offset " I64FORMAT ", pagesize %d",
> + (int)chksum, (int)page->checksum, (int)(fileOffset/pageSize),
> + fileName, fileOffset, pageSize);
> }
>
> bool IO::trialRead(Bdb *bdb)
> @@ -278,23 +299,7 @@ void IO::writePage(Bdb * bdb, int type)
>
> ASSERT(bdb->pageNumber != HEADER_PAGE || ((Page*)(bdb->buffer))->pageType
> == PAGE_header);
> tracePage(bdb);
> - SEEK_OFFSET offset = (int64) bdb->pageNumber * pageSize;
> - int length = pwrite (offset, pageSize, (UCHAR *)bdb->buffer);
> -
> - if (length != pageSize)
> - {
> - if (errno == ENOSPC)
> - throw SQLError(DEVICE_FULL, "device full error on %s, page %d\n", (const char*)
> fileName, bdb->pageNumber);
> -
> - declareFatalError();
> - FATAL ("write error on page %d (%d/%d/%d) of \"%s\": %s (%d)",
> - bdb->pageNumber, length, pageSize, fileId,
> - (const char*) fileName, strerror (errno), errno);
> - }
> -
> - ++writes;
> - ++writesSinceSync;
> - ++writeTypes[type];
> + writePages(bdb->pageNumber, pageSize, (UCHAR *)bdb->buffer, type);
> }
>
> void IO::writePages(int32 pageNumber, int length, const UCHAR* data, int type)
> @@ -308,7 +313,17 @@ void IO::writePages(int32 pageNumber, in
> if (simulateDiskFull && offset + length > simulateDiskFull)
> throw SQLError(DEVICE_FULL, "device full error on %s, page %d\n", (const char*)
> fileName, pageNumber);
> #endif
> -
> +
> + Page *page = (Page *)data;
> +
> + for (int i = 0;i < length/pageSize; i++ ,page = (Page *)((UCHAR *)page +
> pageSize))
> + {
> + if (falcon_checksums)
> + page->checksum = computeChecksum(page, pageSize);
> + else
> + page->checksum = NO_CHECKSUM_MAGIC;
> + }
> +
> int ret = pwrite (offset, length, data);
>
> if (ret != length)
> @@ -330,15 +345,20 @@ void IO::writePages(int32 pageNumber, in
>
> void IO::readHeader(Hdr * header)
> {
> + static const int maxPageSize = 32*1024;
> static const int sectorSize = 4096;
> - UCHAR temp[sectorSize * 2];
> - UCHAR *buffer = (UCHAR*) (((UIPTR) temp + sectorSize - 1) / sectorSize *
> sectorSize);
> + UCHAR temp[maxPageSize + sectorSize];
> + UCHAR *buffer = ALIGN(temp, sectorSize);
> int n = lseek (fileId, 0, SEEK_SET);
> - n = ::read (fileId, buffer, sectorSize);
> + n = ::read (fileId, buffer, maxPageSize);
>
> if (n < (int) sizeof (Hdr))
> FATAL ("read error on database header");
> -
> +
> + Hdr* hdr = (Hdr*) buffer;
> + if (falcon_checksums && hdr->pageSize <= n)
> + validateChecksum((Page*) buffer, hdr->pageSize, 0);
> +
> memcpy(header, buffer, sizeof(Hdr));
> }
>
> @@ -679,3 +699,52 @@ static int getLinuxVersion()
> return vers = KERNEL_VERSION(major,minor,release);
> }
> #endif
> +
> +
> +
> +/*
> + Calculate checksum for a page.
> + The algorithm somewhat resembles fletcher checksum , but it is performed
> + on 64 bit integers and two's complement arithmetic, without taking care
> + of overflow.
> +
> + Also,
> + - Calculation skips 3rd and 4th bytes in the page
> + (which is checksum field in the page header)
> + - Returned result is never 0
> +*/
> +uint16 IO::computeChecksum(Page *page, size_t len)
> +{
> + uint64 sum1,sum2;
> + uint64 *data = (uint64 *)page;
> + uint64 *end = data + len/8;
> +
> + ASSERT(len >=8 && OFFSET(Page*,checksum)==2 &&
> sizeof(page->checksum)==2);
> +
> + // Initialize sums, mask out checksum bytes in the page header
> + static uint16 endian = 1;
> + uint64 mask = *(char *)(&endian)?0xFFFFFFFF0000FFFFLL:0xFFFF0000FFFFFFFFLL;
> +
> + sum1 = sum2 = *data & mask;
> +
> + data++;
> +
> + while(data < end)
> + {
> + sum1 += *data++;
> + sum2 += sum1;
> + }
> +
> + uint64 sum = sum1 + sum2;
> +
> + // Fold the result to 16 bit
> + while(sum >> 16)
> + sum = (sum & 0xFFFF) + (sum >> 16);
> +
> + if(sum == 0)
> + sum = NON_ZERO_CHECKSUM_MAGIC;
> +
> + return (uint16) sum;
> +
> +}
> +
>
> === modified file 'storage/falcon/IOx.h'
> --- a/storage/falcon/IOx.h 2008-03-27 06:09:29 +0000
> +++ b/storage/falcon/IOx.h 2008-07-07 14:00:45 +0000
> @@ -36,9 +36,12 @@ static const int WRITE_TYPE_CLONE = 4;
> static const int WRITE_TYPE_FLUSH = 5;
> static const int WRITE_TYPE_MAX = 6;
>
> +static const uint16 NO_CHECKSUM_MAGIC = 0;
> +
> class Bdb;
> class Hdr;
> class Dbb;
> +class Page;
>
> class IO
> {
> @@ -75,7 +78,8 @@ public:
> static void trace(int fd, int pageNumber, int pageType, int pageId);
> static void traceOpen(void);
> static void traceClose(void);
> -
> + static uint16 computeChecksum(Page *page, size_t pageSize);
> + void validateChecksum(Page *page, size_t pageSize, int64 fileOffset);
> static void createPath (const char *fileName);
> static const char *baseName(const char *path);
> static void expandFileName(const char *fileName, int length, char *buffer, const
> char **baseFileName = NULL);
>
> === modified file 'storage/falcon/Page.h'
> --- a/storage/falcon/Page.h 2008-06-17 17:41:54 +0000
> +++ b/storage/falcon/Page.h 2008-07-07 14:00:45 +0000
> @@ -58,8 +58,8 @@ class EncodedDataStream;
> class Page
> {
> public:
> - short pageType;
> - short checksum;
> + int16 pageType;
> + uint16 checksum;
>
> #ifdef HAVE_PAGE_NUMBER
> int32 pageNumber;
>
> === modified file 'storage/falcon/SectorBuffer.cpp'
> --- a/storage/falcon/SectorBuffer.cpp 2008-07-02 20:55:11 +0000
> +++ b/storage/falcon/SectorBuffer.cpp 2008-07-07 14:00:45 +0000
> @@ -19,6 +19,7 @@
> #include "SectorCache.h"
> #include "BDB.h"
> #include "Dbb.h"
> +#include "Page.h"
>
> SectorBuffer::SectorBuffer()
> {
> @@ -32,9 +33,29 @@ SectorBuffer::~SectorBuffer(void)
>
> void SectorBuffer::readPage(Bdb* bdb)
> {
> - int offset = (bdb->pageNumber % cache->pagesPerSector) * cache->pageSize;
> + int pageSize = cache->pageSize;
> +
> + int offset = (bdb->pageNumber % cache->pagesPerSector) * pageSize;
> ASSERT(offset < activeLength);
> - memcpy(bdb->buffer, buffer + offset, cache->pageSize);
> +
> + Page *page = (Page *)(buffer + offset);
> +
> + /*
> + Validate page checksum.
> + Do it only once and after that reset the checksum field. It is necessary
> + because later the checksum in header might be incorrect (when page is read,
> + modified and written back to buffer┤but not yet to disk).Also, the same page
> + might be read multiple times and we want to avoid the checksum calculation
> + overhead.
> + */
> + if(page->checksum != NO_CHECKSUM_MAGIC)
> + {
> + dbb->validateChecksum(page, pageSize, ((int64)bdb->pageNumber) * pageSize);
> + page->checksum = NO_CHECKSUM_MAGIC;
> + }
> + memcpy(bdb->buffer, page, pageSize);
> +
> +
> }
>
> void SectorBuffer::readSector()
>
> === modified file 'storage/falcon/StorageParameters.h'
> --- a/storage/falcon/StorageParameters.h 2008-06-26 13:30:24 +0000
> +++ b/storage/falcon/StorageParameters.h 2008-07-07 14:00:45 +0000
> @@ -11,7 +11,7 @@
> // #define PLUGIN_VAR_OPCMDARG 0x2000 = Argument optional for cmd line
> // #define PLUGIN_VAR_MEMALLOC 0x8000 = String needs memory allocated
>
> -
> +PARAMETER_BOOL(checksums, "Enable Falcon checksum validation", 1, 0x200, NULL)
> PARAMETER_UINT(debug_mask, "Falcon message type mask for logged messages.", 0, 0,
> INT_MAX, 0, StorageInterface::updateDebugMask)
> PARAMETER_BOOL(debug_server, "Enable Falcon debug code.", 0, 0x0200, NULL)
> PARAMETER_UINT(debug_trace, "Falcon debug trace trigger.", 0, 0, INT_MAX, 0, NULL)
>
>