#At bzr+ssh://bk-internal.mysql.com/bzrroot/server/mysql-6.0-falcon/
2717 Vladislav Vaintroub 2008-06-25
WL#4173 - Falcon checksums
- New parameter falcon_checksums (default ON) to calculate and verify page checksums.
- If parameter is set, verify checksums as pages are read from disk.
On error, stop with abort()/ DebugBreak() after message including filename,
file offset and page size has been written to the error log.
- Update page->checksum as pages are written to the disk. If falcon_checksums is not
set page->checksum is set to magic value 0 (believed to be always correct by validation
routine)
- Checksum algorithm is slightly modified Fletchers 8 bit (never produces 0 checksum)
modified:
mysql-test/suite/falcon/r/falcon_options.result
mysql-test/suite/falcon/r/falcon_options2.result
storage/falcon/IO.cpp
storage/falcon/IOx.h
storage/falcon/Page.h
storage/falcon/SectorBuffer.cpp
storage/falcon/StorageParameters.h
per-file messages:
mysql-test/suite/falcon/r/falcon_options.result
New parameter falcon_checksums
mysql-test/suite/falcon/r/falcon_options2.result
New parameter falcon_checksums
storage/falcon/IO.cpp
implement page checksum calculation and validation.
- use modified fletcher checksum algorithm
- update checksums, before pages are written to disk.
set page->checksum to 0 if falcon_checksums is not set
- verify checksums, as pages are read from disk.
Skip verification is falcon_checksums is not set or page->checksum is 0
storage/falcon/IOx.h
New methods :
- readPages( pread() plus checksum verification)
- checksum (modified Fletchers algorithm)
- pageChecksum()
- validateChecksum()
storage/falcon/Page.h
checksum is made *unsigned* short
(avoids couple of cast, checksum algorithms internally work
with and produce unsigned values)
storage/falcon/SectorBuffer.cpp
replace pread with readPages - later does checksum verification in addition to pread()
storage/falcon/StorageParameters.h
New parameter falcon_checksums
=== modified file 'mysql-test/suite/falcon/r/falcon_options.result'
--- a/mysql-test/suite/falcon/r/falcon_options.result 2008-06-23 10:22:43 +0000
+++ b/mysql-test/suite/falcon/r/falcon_options.result 2008-06-25 00:21:05 +0000
@@ -1,6 +1,7 @@
SHOW VARIABLES LIKE 'falcon_%';
Variable_name Value
falcon_checkpoint_schedule 7 * * * * *
+falcon_checksums ON
falcon_consistent_read ON
falcon_debug_mask 0
falcon_debug_server OFF
@@ -90,6 +91,7 @@ SELECT * FROM INFORMATION_SCHEMA.global_
WHERE variable_name LIKE 'falcon%';
VARIABLE_NAME VARIABLE_VALUE
FALCON_CHECKPOINT_SCHEDULE 7 * * * * *
+FALCON_CHECKSUMS ON
FALCON_CONSISTENT_READ ON
FALCON_DEBUG_MASK 4096
FALCON_DEBUG_SERVER OFF
=== modified file 'mysql-test/suite/falcon/r/falcon_options2.result'
--- a/mysql-test/suite/falcon/r/falcon_options2.result 2008-06-23 10:22:43 +0000
+++ b/mysql-test/suite/falcon/r/falcon_options2.result 2008-06-25 00:21:05 +0000
@@ -2,6 +2,7 @@ SELECT * FROM INFORMATION_SCHEMA.global_
WHERE variable_name LiKE 'falcon_%';
VARIABLE_NAME VARIABLE_VALUE
FALCON_CHECKPOINT_SCHEDULE 7 * * * * *
+FALCON_CHECKSUMS ON
FALCON_CONSISTENT_READ ON
FALCON_DEBUG_MASK 0
FALCON_DEBUG_SERVER OFF
=== modified file 'storage/falcon/IO.cpp'
--- a/storage/falcon/IO.cpp 2008-04-29 08:39:19 +0000
+++ b/storage/falcon/IO.cpp 2008-06-25 00:21:05 +0000
@@ -100,10 +100,14 @@ static int getLinuxVersion();
//#define SIMULATE_DISK_FULL 1000000;
extern uint falcon_direct_io;
+extern char falcon_checksums;
static const int TRACE_SYNC_START = -1;
static const int TRACE_SYNC_END = -2;
+static const uint16 NO_CHECKSUM_MAGIC = 0;
+static const uint16 NON_ZERO_CHECKSUM_MAGIC = 0xAFFE;
+
#ifdef SIMULATE_DISK_FULL
static int simulateDiskFull = SIMULATE_DISK_FULL;
#endif
@@ -243,7 +247,7 @@ void IO::readPage(Bdb * bdb)
FATAL ("can't continue after fatal error");
SEEK_OFFSET offset = (int64) bdb->pageNumber * pageSize;
- int length = pread (offset, pageSize, (UCHAR *)bdb->buffer);
+ int length = readPages (offset, pageSize, (UCHAR *)bdb->buffer);
if (length != pageSize)
{
@@ -255,6 +259,39 @@ void IO::readPage(Bdb * bdb)
++reads;
}
+/*
+ Read from tablespace file and perform and checksum verification
+*/
+int IO::readPages(int64 offset, int length, UCHAR *buffer)
+{
+ int result = pread (offset, length , buffer);
+
+ if(!falcon_checksums || result != length)
+ return result;
+
+ // Verify checksums
+ Page *page = (Page *)buffer;
+
+ for(int i = 0; i < length/pageSize ; i++ , page = (Page *)((UCHAR *)page + pageSize))
+ validateChecksum(page, pageSize, offset + i*pageSize);
+
+ return result;
+}
+
+void IO::validateChecksum(Page *page, size_t pageSize, int64 fileOffset)
+{
+ if (page->checksum == NO_CHECKSUM_MAGIC)
+ return;
+
+ uint16 chksum = pageChecksum(page, pageSize);
+
+ if (page->checksum != chksum)
+ FATAL("Checksum error (expected %d, got %d) at page %d"
+ "file '%s', offset " I64FORMAT ", pagesize %d",
+ (int)chksum, (int)page->checksum, (int)(fileOffset/pageSize),
+ fileName, fileOffset, pageSize);
+}
+
bool IO::trialRead(Bdb *bdb)
{
Sync sync (&syncObject, "IO::trialRead");
@@ -278,23 +315,7 @@ void IO::writePage(Bdb * bdb, int type)
ASSERT(bdb->pageNumber != HEADER_PAGE || ((Page*)(bdb->buffer))->pageType == PAGE_header);
tracePage(bdb);
- SEEK_OFFSET offset = (int64) bdb->pageNumber * pageSize;
- int length = pwrite (offset, pageSize, (UCHAR *)bdb->buffer);
-
- if (length != pageSize)
- {
- if (errno == ENOSPC)
- throw SQLError(DEVICE_FULL, "device full error on %s, page %d\n", (const char*) fileName, bdb->pageNumber);
-
- declareFatalError();
- FATAL ("write error on page %d (%d/%d/%d) of \"%s\": %s (%d)",
- bdb->pageNumber, length, pageSize, fileId,
- (const char*) fileName, strerror (errno), errno);
- }
-
- ++writes;
- ++writesSinceSync;
- ++writeTypes[type];
+ writePages(bdb->pageNumber, pageSize, (UCHAR *)bdb->buffer, type);
}
void IO::writePages(int32 pageNumber, int length, const UCHAR* data, int type)
@@ -308,7 +329,17 @@ void IO::writePages(int32 pageNumber, in
if (simulateDiskFull && offset + length > simulateDiskFull)
throw SQLError(DEVICE_FULL, "device full error on %s, page %d\n", (const char*) fileName, pageNumber);
#endif
-
+
+ Page *page = (Page *)data;
+
+ for (int i = 0;i < length/pageSize; i++ ,page = (Page *)((UCHAR *)page + pageSize))
+ {
+ if (falcon_checksums)
+ page->checksum = pageChecksum(page, pageSize);
+ else
+ page->checksum = NO_CHECKSUM_MAGIC;
+ }
+
int ret = pwrite (offset, length, data);
if (ret != length)
@@ -330,15 +361,19 @@ void IO::writePages(int32 pageNumber, in
void IO::readHeader(Hdr * header)
{
- static const int sectorSize = 4096;
- UCHAR temp[sectorSize * 2];
- UCHAR *buffer = (UCHAR*) (((UIPTR) temp + sectorSize - 1) / sectorSize * sectorSize);
+ static const int bufSize = 32*1024;
+ UCHAR temp[bufSize + 4096];
+ UCHAR *buffer = ALIGN(temp,4096);
int n = lseek (fileId, 0, SEEK_SET);
- n = ::read (fileId, buffer, sectorSize);
+ n = ::read (fileId, buffer, bufSize);
if (n < (int) sizeof (Hdr))
FATAL ("read error on database header");
-
+
+ Hdr* hdr = (Hdr*) buffer;
+ if (falcon_checksums && hdr->pageSize <= n)
+ validateChecksum((Page*) buffer, hdr->pageSize, 0);
+
memcpy(header, buffer, sizeof(Hdr));
}
@@ -679,3 +714,51 @@ static int getLinuxVersion()
return vers = KERNEL_VERSION(major,minor,release);
}
#endif
+
+/*
+ Fletcher's checksum, with tweaks
+ Tweak 1 : never produces 0 checksum.
+ Tweak 2 : allows to skip some bytes in buffer
+*/
+
+uint16 IO::checksum(UCHAR *data, size_t len, UCHAR *skipStart, UCHAR *skipEnd)
+{
+ uint16 sum1 = 0xff, sum2 = 0xff;
+
+ while (len)
+ {
+ size_t tlen = len > 21 ? 21 : len;
+ len -= tlen;
+ do
+ {
+ if (!(data >= skipStart && data < skipEnd))
+ {
+ sum1 += *data;
+ sum2 += sum1;
+ }
+ data++;
+ }
+ while (--tlen);
+ sum1 = (sum1 & 0xff) + (sum1 >> 8);
+ sum2 = (sum2 & 0xff) + (sum2 >> 8);
+ }
+ /* Second reduction step to reduce sums to 8 bits */
+ sum1 = (sum1 & 0xff) + (sum1 >> 8);
+ sum2 = (sum2 & 0xff) + (sum2 >> 8);
+ uint16 result = ((sum2 & 0xff) << 8) | (sum1 & 0xff);
+ if(result == 0)
+ result = NON_ZERO_CHECKSUM_MAGIC;
+ return result;
+}
+
+/*
+ Calculate checksum for a page.
+ Skips 2 bytes checksum field in the page header
+*/
+uint16 IO::pageChecksum(Page *page, size_t size)
+{
+ UCHAR *data = (UCHAR*)page;
+ return checksum(data, size , data + OFFSET(Page*,checksum),
+ data + OFFSET(Page*,checksum) + sizeof(page->checksum));
+}
+
=== modified file 'storage/falcon/IOx.h'
--- a/storage/falcon/IOx.h 2008-03-27 06:09:29 +0000
+++ b/storage/falcon/IOx.h 2008-06-25 00:21:05 +0000
@@ -39,6 +39,7 @@ static const int WRITE_TYPE_MAX = 6;
class Bdb;
class Hdr;
class Dbb;
+class Page;
class IO
{
@@ -60,6 +61,7 @@ public:
void writePage (Bdb *buffer, int type);
void writePages(int32 pageNumber, int length, const UCHAR* data, int type);
void readPage (Bdb *page);
+ int readPages(int64 offset, int length, UCHAR* data);
bool createFile (const char *name, uint64 initialAllocation);
bool openFile (const char *name, bool readOnly);
void longSeek(int64 offset);
@@ -75,7 +77,10 @@ public:
static void trace(int fd, int pageNumber, int pageType, int pageId);
static void traceOpen(void);
static void traceClose(void);
-
+ uint16 checksum(UCHAR *data, size_t len, UCHAR *skipStart, UCHAR *skipEnd);
+ uint16 pageChecksum(Page *page, size_t pageSize);
+ void validateChecksum(Page *page, size_t pageSize, int64 fileOffset);
+
static void createPath (const char *fileName);
static const char *baseName(const char *path);
static void expandFileName(const char *fileName, int length, char *buffer, const char **baseFileName = NULL);
=== modified file 'storage/falcon/Page.h'
--- a/storage/falcon/Page.h 2008-06-17 17:41:54 +0000
+++ b/storage/falcon/Page.h 2008-06-25 00:21:05 +0000
@@ -58,8 +58,8 @@ class EncodedDataStream;
class Page
{
public:
- short pageType;
- short checksum;
+ int16 pageType;
+ uint16 checksum;
#ifdef HAVE_PAGE_NUMBER
int32 pageNumber;
=== modified file 'storage/falcon/SectorBuffer.cpp'
--- a/storage/falcon/SectorBuffer.cpp 2008-06-17 21:00:45 +0000
+++ b/storage/falcon/SectorBuffer.cpp 2008-06-25 00:21:05 +0000
@@ -40,7 +40,7 @@ void SectorBuffer::readPage(Bdb* bdb)
void SectorBuffer::readSector()
{
uint64 offset = sectorNumber * cache->pagesPerSector * cache->pageSize;
- activeLength = dbb->pread(offset, SECTOR_BUFFER_SIZE, buffer);
+ activeLength = dbb->readPages(offset, SECTOR_BUFFER_SIZE, buffer);
}
void SectorBuffer::setSector(Dbb* db, int sector)
=== modified file 'storage/falcon/StorageParameters.h'
--- a/storage/falcon/StorageParameters.h 2008-03-11 16:15:47 +0000
+++ b/storage/falcon/StorageParameters.h 2008-06-25 00:21:05 +0000
@@ -11,7 +11,7 @@
// #define PLUGIN_VAR_OPCMDARG 0x2000 = Argument optional for cmd line
// #define PLUGIN_VAR_MEMALLOC 0x8000 = String needs memory allocated
-
+PARAMETER_BOOL(checksums, "Enable Falcon checksum validation", 1, 0x200, NULL)
PARAMETER_UINT(debug_mask, "Falcon message type mask for logged messages.", 0, 0, INT_MAX, 0, StorageInterface::updateDebugMask)
PARAMETER_BOOL(debug_server, "Enable Falcon debug code.", 0, 0x0200, NULL)
PARAMETER_UINT(debug_trace, "Falcon debug trace trigger.", 0, 0, INT_MAX, 0, NULL)