4594 Frazer Clement 2012-10-24
Commit patches in customer-specific test tree
added:
patches/
patches/1--3-5320311201-Fix-watchdog
patches/2--3-6144827961-DIH-crash-fix
patches/3--3-5890290931-Enable-large-hashmaps
patches/series
4593 Martin Skold 2012-08-21 {clone-mysql-5.1.63-ndb-7.1.24-src-build}
Fixed correct log_part to be displayed in ndbinfo
modified:
storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp
=== added directory 'patches'
=== added file 'patches/1--3-5320311201-Fix-watchdog'
--- a/patches/1--3-5320311201-Fix-watchdog 1970-01-01 00:00:00 +0000
+++ b/patches/1--3-5320311201-Fix-watchdog 2012-10-24 16:13:46 +0000
@@ -0,0 +1,51 @@
+1--3-5320311201-Fix-watchdog
+---
+ storage/ndb/src/kernel/blocks/backup/Backup.cpp | 28 ++++++++++++++++++++++++
+ 1 file changed, 28 insertions(+)
+
+Index: mysql-5.1-telco-7.1.24-alu/storage/ndb/src/kernel/blocks/backup/Backup.cpp
+===================================================================
+--- mysql-5.1-telco-7.1.24-alu.orig/storage/ndb/src/kernel/blocks/backup/Backup.cpp 2012-10-24 15:54:04.413660000 +0100
++++ mysql-5.1-telco-7.1.24-alu/storage/ndb/src/kernel/blocks/backup/Backup.cpp 2012-10-24 16:34:42.338498065 +0100
+@@ -4697,6 +4697,13 @@ Backup::ready_to_write(bool ready, Uint3
+ ndbout << endl << "Current Millisecond is = ";
+ ndbout << NdbTick_CurrentMillisecond() << endl;
+ #endif
++
++ if (ERROR_INSERTED(10043) && eof)
++ {
++ /* Block indefinitely without closing the file */
++ return false;
++ }
++
+ if ((ready || eof) &&
+ m_words_written_this_period <= m_curr_disk_write_speed)
+ {
+@@ -5954,6 +5961,27 @@ Backup::execLCP_STATUS_REQ(Signal* signa
+ conf->replicaDoneRowsHi,
+ conf->replicaDoneRowsLo);
+ }
++ else if (state == LcpStatusConf::LCP_SCANNED)
++ {
++ /* May take some time to drain the FS buffer, depending on
++ * size of buff, achieved rate.
++ * We'll track this as if it were replica done rows
++ * This should avoid false watchdog failures in systems
++ * with slow disks / bad config.
++ */
++ BackupFilePtr filePtr;
++ c_backupFilePool.getPtr(filePtr, ptr.p->dataFilePtr);
++ ndbrequire(filePtr.p->backupPtr == ptr.i);
++ Uint64 flushBacklog =
++ filePtr.p->operation.dataBuffer.getUsableSize() -
++ filePtr.p->operation.dataBuffer.getFreeSize();
++
++ conf->tableId = 0;
++ conf->fragId = 0;
++ setWords(flushBacklog,
++ conf->replicaDoneRowsHi,
++ conf->replicaDoneRowsLo);
++ }
+
+ failCode = 0;
+ }
=== added file 'patches/2--3-6144827961-DIH-crash-fix'
--- a/patches/2--3-6144827961-DIH-crash-fix 1970-01-01 00:00:00 +0000
+++ b/patches/2--3-6144827961-DIH-crash-fix 2012-10-24 16:13:46 +0000
@@ -0,0 +1,390 @@
+2--3-6144827961-DIH-crash-fix
+---
+ storage/ndb/include/kernel/signaldata/DumpStateOrd.hpp | 1
+ storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp | 16 +
+ storage/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp | 2
+ storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp | 105 ++++++++++-
+ storage/ndb/src/kernel/vm/CountingSemaphore.hpp | 161 +++++++++++++++++
+ 5 files changed, 276 insertions(+), 9 deletions(-)
+
+Index: mysql-5.1-telco-7.1.24-alu/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
+===================================================================
+--- mysql-5.1-telco-7.1.24-alu.orig/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp 2012-10-24 16:47:32.248498419 +0100
++++ mysql-5.1-telco-7.1.24-alu/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp 2012-10-24 16:47:33.698498421 +0100
+@@ -1192,6 +1192,14 @@ void Dbdih::execFSWRITECONF(Signal* sign
+ break;
+ case FileRecord::TABLE_WRITE:
+ jam();
++ if (ERROR_INSERTED(7235))
++ {
++ jam();
++ filePtr.p->reqStatus = status;
++ /* Suspend processing of WRITECONFs */
++ sendSignalWithDelay(reference(), GSN_FSWRITECONF, signal, 1000, signal->getLength());
++ return;
++ }
+ tableWriteLab(signal, filePtr);
+ break;
+ default:
+@@ -13522,10 +13530,26 @@ void Dbdih::execLCP_FRAG_REP(Signal* sig
+ */
+ tabPtr.p->tabLcpStatus = TabRecord::TLS_WRITING_TO_FILE;
+ tabPtr.p->tabCopyStatus = TabRecord::CS_LCP_READ_TABLE;
+- tabPtr.p->tabUpdateState = TabRecord::US_LOCAL_CHECKPOINT;
+- signal->theData[0] = DihContinueB::ZPACK_TABLE_INTO_PAGES;
+- signal->theData[1] = tabPtr.i;
+- sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
++
++ /**
++ * Check whether we should write immediately, or queue...
++ */
++ if (c_lcpTabDefWritesControl.requestMustQueue())
++ {
++ jam();
++ //ndbout_c("DIH : Queueing tab def flush op on table %u", tabPtr.i);
++ /* Mark as queued - will be started when an already running op completes */
++ tabPtr.p->tabUpdateState = TabRecord::US_LOCAL_CHECKPOINT_QUEUED;
++ }
++ else
++ {
++ /* Run immediately */
++ jam();
++ tabPtr.p->tabUpdateState = TabRecord::US_LOCAL_CHECKPOINT;
++ signal->theData[0] = DihContinueB::ZPACK_TABLE_INTO_PAGES;
++ signal->theData[1] = tabPtr.i;
++ sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
++ }
+
+ bool ret = checkLcpAllTablesDoneInLqh(__LINE__);
+ if (ret && ERROR_INSERTED(7209))
+@@ -14277,12 +14301,48 @@ void Dbdih::tableCloseLab(Signal* signal
+ case TabRecord::US_LOCAL_CHECKPOINT:
+ jam();
+ releaseTabPages(tabPtr.i);
+- signal->theData[0] = DihContinueB::ZCHECK_LCP_COMPLETED;
+- sendSignal(reference(), GSN_CONTINUEB, signal, 1, JBB);
+
+ tabPtr.p->tabCopyStatus = TabRecord::CS_IDLE;
+ tabPtr.p->tabUpdateState = TabRecord::US_IDLE;
+ tabPtr.p->tabLcpStatus = TabRecord::TLS_COMPLETED;
++
++ /* Check whether there's some queued table definition flush op to start */
++ if (c_lcpTabDefWritesControl.releaseMustStartQueued())
++ {
++ jam();
++ /* Some table write is queued - let's kick it off */
++ /* First find it...
++ * By using the tabUpdateState to 'queue' operations, we lose
++ * the original flush request order, which shouldn't matter.
++ * In any case, the checkpoint proceeds by table id, as does this
++ * search, so a similar order should result
++ */
++ TabRecordPtr tabPtr;
++ for (tabPtr.i = 0; tabPtr.i < ctabFileSize; tabPtr.i++)
++ {
++ ptrAss(tabPtr, tabRecord);
++ if (tabPtr.p->tabUpdateState == TabRecord::US_LOCAL_CHECKPOINT_QUEUED)
++ {
++ jam();
++ //ndbout_c("DIH : Starting queued table def flush op on table %u", tabPtr.i);
++ tabPtr.p->tabUpdateState = TabRecord::US_LOCAL_CHECKPOINT;
++ signal->theData[0] = DihContinueB::ZPACK_TABLE_INTO_PAGES;
++ signal->theData[1] = tabPtr.i;
++ sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
++ return;
++ }
++ }
++ /* No queued table write found - error */
++ ndbout_c("DIH : Error in queued table writes : inUse %u queued %u total %u",
++ c_lcpTabDefWritesControl.inUse,
++ c_lcpTabDefWritesControl.queuedRequests,
++ c_lcpTabDefWritesControl.totalResources);
++ ndbrequire(false);
++ }
++ jam();
++ signal->theData[0] = DihContinueB::ZCHECK_LCP_COMPLETED;
++ sendSignal(reference(), GSN_CONTINUEB, signal, 1, JBB);
++
+ return;
+ break;
+ case TabRecord::US_REMOVE_NODE:
+@@ -18020,6 +18080,39 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal
+ }
+ }
+
++ if (arg == DumpStateOrd::DihDumpPageRecInfo)
++ {
++ jam();
++ ndbout_c("MAX_CONCURRENT_LCP_TAB_DEF_FLUSHES %u", MAX_CONCURRENT_LCP_TAB_DEF_FLUSHES);
++ ndbout_c("MAX_CONCURRENT_DIH_TAB_DEF_OPS %u", MAX_CONCURRENT_DIH_TAB_DEF_OPS);
++ ndbout_c("MAX_CRASHED_REPLICAS %u", MAX_CRASHED_REPLICAS);
++ ndbout_c("MAX_LCP_STORED %u", MAX_LCP_STORED);
++ ndbout_c("MAX_REPLICAS %u", MAX_REPLICAS);
++ ndbout_c("MAX_NDB_PARTITIONS %u", MAX_NDB_PARTITIONS);
++ ndbout_c("PACK_REPLICAS_WORDS %u", PACK_REPLICAS_WORDS);
++ ndbout_c("PACK_FRAGMENT_WORDS %u", PACK_FRAGMENT_WORDS);
++ ndbout_c("PACK_TABLE_WORDS %u", PACK_TABLE_WORDS);
++ ndbout_c("PACK_TABLE_PAGE_WORDS %u", PACK_TABLE_PAGE_WORDS);
++ ndbout_c("PACK_TABLE_PAGES %u", PACK_TABLE_PAGES);
++ ndbout_c("ZPAGEREC %u", ZPAGEREC);
++ ndbout_c("Total bytes : %lu", ZPAGEREC * sizeof(PageRecord));
++ ndbout_c("LCP Tab def write ops inUse %u queued %u",
++ c_lcpTabDefWritesControl.inUse,
++ c_lcpTabDefWritesControl.queuedRequests);
++ Uint32 freeCount = 0;
++ PageRecordPtr tmp;
++ tmp.i = cfirstfreepage;
++ while (tmp.i != RNIL)
++ {
++ jam();
++ ptrCheckGuard(tmp, cpageFileSize, pageRecord);
++ freeCount++;
++ tmp.i = tmp.p->nextfreepage;
++ };
++ ndbout_c("Pages in use %u/%u", cpageFileSize - freeCount, cpageFileSize);
++ return;
++ }
++
+ DECLARE_DUMP0(DBDIH, 7213, "Set error 7213 with extra arg")
+ {
+ SET_ERROR_INSERT_VALUE2(7213, signal->theData[1]);
+Index: mysql-5.1-telco-7.1.24-alu/storage/ndb/src/kernel/vm/CountingSemaphore.hpp
+===================================================================
+--- /dev/null 1970-01-01 00:00:00.000000000 +0000
++++ mysql-5.1-telco-7.1.24-alu/storage/ndb/src/kernel/vm/CountingSemaphore.hpp 2012-10-24 16:47:33.708498421 +0100
+@@ -0,0 +1,161 @@
++/*
++ Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved.
++
++ This program is free software; you can redistribute it and/or modify
++ it under the terms of the GNU General Public License as published by
++ the Free Software Foundation; version 2 of the License.
++
++ This program is distributed in the hope that it will be useful,
++ but WITHOUT ANY WARRANTY; without even the implied warranty of
++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
++ GNU General Public License for more details.
++
++ You should have received a copy of the GNU General Public License
++ along with this program; if not, write to the Free Software
++ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
++*/
++
++#ifndef COUNTING_SEMAPHORE_HPP
++#define COUNTING_SEMAPHORE_HPP
++
++/**
++ * CountingSemaphore
++ *
++ * Helper for limiting concurrency on some resources.
++ * The Semaphore is created with some maximum concurrency level
++ * Up to this many resources may be concurrently used.
++ * When more than this number of resources are used concurrently,
++ * further requests must queue until a resource is released.
++ *
++ * This structure does not manage queueing and restarting of
++ * resource allocation requests, just monitors the number of
++ * resources in use, and the number of resource requests
++ * queued up.
++ *
++ * To be useful, some external request queueing and dequeuing
++ * mechanism is required.
++ */
++ class CountingSemaphore
++ {
++ public:
++ CountingSemaphore():
++ inUse(0),
++ queuedRequests(0),
++ totalResources(1)
++ {};
++
++ ~CountingSemaphore() {};
++
++ /**
++ * init
++ * Initialise the totalResources
++ */
++ void init(Uint32 _totalResources)
++ {
++ assert(inUse == 0);
++ totalResources = _totalResources;
++ }
++
++ /**
++ * requestMustQueue
++ *
++ * Part of semaphore P()/acquire()/down() implementation
++ *
++ * Called to request a resource.
++ * Returns whether the request must be queued, or
++ * can be satisfied immediately.
++ *
++ * true - no resource available, queue request.
++ * false - resource available, proceed.
++ *
++ * e.g. if (<sema>.requestMustQueue()) {
++ * queue_request;
++ * return;
++ * }
++ *
++ * proceed;
++ */
++ bool requestMustQueue()
++ {
++ assert(inUse <= totalResources);
++ if (inUse == totalResources)
++ {
++ queuedRequests++;
++ return true;
++ }
++ else
++ {
++ assert(queuedRequests == 0);
++ inUse++;
++ return false;
++ }
++ }
++
++ /**
++ * releaseMustStartQueued
++ *
++ * Part of semaphore V()/release()/up()
++ *
++ * Called to release a resource.
++ * Returns whether some queued resource request
++ * must be restarted.
++ *
++ * true - a queued request exists and must be started.
++ * false - no queued request exists, proceed.
++ *
++ * e.g.
++ * if (<sema>.releaseMustStartQueued()) {
++ * dequeue_request;
++ * begin_request_processing;
++ * }
++ *
++ * proceed;
++ */
++ bool releaseMustStartQueued()
++ {
++ assert(inUse > 0);
++ if (queuedRequests > 0)
++ {
++ assert(inUse == totalResources);
++ queuedRequests--;
++ return true;
++ }
++
++ inUse--;
++ return false;
++ }
++
++ /**
++ * getTotalRequests
++ *
++ * Returns the sum of the inuse resources and queued requests.
++ * e.g. the offered concurrency on the resource.
++ */
++ Uint32 getTotalRequests() const
++ {
++ return inUse + queuedRequests;
++ }
++
++ /**
++ * getResourcesAvailable()
++ *
++ * Returns the number of resources available currently
++ */
++ Uint32 getResourcesAvailable() const
++ {
++ assert(inUse <= totalResources);
++ return (totalResources - inUse);
++ }
++
++
++ /* inUse - number resources currently in use */
++ Uint32 inUse;
++
++ /* queuedRequests - number requests waiting 'outside' */
++ Uint32 queuedRequests;
++
++ /* totalResources - the maximum resources in use at one time */
++ Uint32 totalResources;
++ }; /* CountingSemaphore */
++
++#endif
+Index: mysql-5.1-telco-7.1.24-alu/storage/ndb/include/kernel/signaldata/DumpStateOrd.hpp
+===================================================================
+--- mysql-5.1-telco-7.1.24-alu.orig/storage/ndb/include/kernel/signaldata/DumpStateOrd.hpp 2012-10-24 16:47:32.218498419 +0100
++++ mysql-5.1-telco-7.1.24-alu/storage/ndb/include/kernel/signaldata/DumpStateOrd.hpp 2012-10-24 16:47:33.708498421 +0100
+@@ -155,6 +155,7 @@ public:
+ // 7019
+ // 7020
+ // 7021
++ DihDumpPageRecInfo = 7032,
+ EnableUndoDelayDataWrite = 7080, // DIH+ACC+TUP
+ DihSetTimeBetweenGcp = 7090,
+ DihStartLcpImmediately = 7099,
+Index: mysql-5.1-telco-7.1.24-alu/storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp
+===================================================================
+--- mysql-5.1-telco-7.1.24-alu.orig/storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp 2012-10-24 16:47:32.238498419 +0100
++++ mysql-5.1-telco-7.1.24-alu/storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp 2012-10-24 16:47:33.708498421 +0100
+@@ -29,6 +29,7 @@
+ #include <blocks/mutexes.hpp>
+ #include <signaldata/LCP.hpp>
+ #include <NdbSeqLock.hpp>
++#include <CountingSemaphore.hpp>
+
+ #ifdef DBDIH_C
+
+@@ -101,10 +102,13 @@
+ /* SIZES */
+ /*#########*/
+ /*
+- * Only pages enough for one table needed, since only
+- * one metadata change at the time is allowed.
++ * Pages are used for flushing table definitions during LCP,
++ * and for other operations such as metadata changes etc
++ *
+ */
+-#define ZPAGEREC PACK_TABLE_PAGES
++#define MAX_CONCURRENT_LCP_TAB_DEF_FLUSHES 4
++#define MAX_CONCURRENT_DIH_TAB_DEF_OPS (MAX_CONCURRENT_LCP_TAB_DEF_FLUSHES + 2)
++#define ZPAGEREC (MAX_CONCURRENT_DIH_TAB_DEF_OPS * PACK_TABLE_PAGES)
+ #define ZCREATE_REPLICA_FILE_SIZE 4
+ #define ZPROXY_MASTER_FILE_SIZE 10
+ #define ZPROXY_FILE_SIZE 10
+@@ -496,6 +500,7 @@ public:
+ enum UpdateState {
+ US_IDLE,
+ US_LOCAL_CHECKPOINT,
++ US_LOCAL_CHECKPOINT_QUEUED,
+ US_REMOVE_NODE,
+ US_COPY_TAB_REQ,
+ US_ADD_TABLE_MASTER,
+@@ -1604,6 +1609,11 @@ private:
+ Uint32 c_set_initial_start_flag;
+ Uint64 c_current_time; // Updated approx. every 10ms
+
++ /* Limit the number of concurrent table definition writes during LCP
++ * This avoids exhausting the DIH page pool
++ */
++ CountingSemaphore c_lcpTabDefWritesControl;
++
+ public:
+ enum LcpMasterTakeOverState {
+ LMTOS_IDLE = 0,
+Index: mysql-5.1-telco-7.1.24-alu/storage/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp
+===================================================================
+--- mysql-5.1-telco-7.1.24-alu.orig/storage/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp 2012-10-24 16:47:32.268498419 +0100
++++ mysql-5.1-telco-7.1.24-alu/storage/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp 2012-10-24 16:47:33.708498421 +0100
+@@ -71,6 +71,8 @@ void Dbdih::initData()
+ c_set_initial_start_flag = FALSE;
+ c_sr_wait_to = false;
+ c_2pass_inr = false;
++
++ c_lcpTabDefWritesControl.init(MAX_CONCURRENT_LCP_TAB_DEF_FLUSHES);
+ }//Dbdih::initData()
+
+ void Dbdih::initRecords()
=== added file 'patches/3--3-5890290931-Enable-large-hashmaps'
--- a/patches/3--3-5890290931-Enable-large-hashmaps 1970-01-01 00:00:00 +0000
+++ b/patches/3--3-5890290931-Enable-large-hashmaps 2012-10-24 16:13:46 +0000
@@ -0,0 +1,20 @@
+---
+ storage/ndb/include/kernel/ndb_limits.h | 4 ++++
+ 1 file changed, 4 insertions(+)
+
+Index: mysql-5.1-telco-7.1.24-alu/storage/ndb/include/kernel/ndb_limits.h
+===================================================================
+--- mysql-5.1-telco-7.1.24-alu.orig/storage/ndb/include/kernel/ndb_limits.h 2012-10-24 15:54:04.413660000 +0100
++++ mysql-5.1-telco-7.1.24-alu/storage/ndb/include/kernel/ndb_limits.h 2012-10-24 17:02:54.118498845 +0100
+@@ -220,7 +220,11 @@
+ */
+
+ #if NDB_VERSION_D < NDB_MAKE_VERSION(7,2,0)
++#ifdef NDB_USE_LARGE_HASHMAPS
++#define NDB_DEFAULT_HASHMAP_BUCKETS (48 * 16 * 5) /* 3840 */
++#else
+ #define NDB_DEFAULT_HASHMAP_BUCKETS 240
++#endif
+ #else
+ #define NDB_DEFAULT_HASHMAP_BUCKETS (48 * 16 * 5) /* 3840 */
+ #endif
=== added file 'patches/series'
--- a/patches/series 1970-01-01 00:00:00 +0000
+++ b/patches/series 2012-10-24 16:13:46 +0000
@@ -0,0 +1,3 @@
+1--3-5320311201-Fix-watchdog
+2--3-6144827961-DIH-crash-fix
+3--3-5890290931-Enable-large-hashmaps
No bundle (reason: useless for push emails).
| Thread |
|---|
| • bzr push into mysql-5.1-telco-7.1 branch (frazer.clement:4593 to 4594) | Frazer Clement | 25 Oct |