From: Frazer Clement Date: October 24 2012 4:17pm Subject: bzr push into mysql-5.1-telco-7.1 branch (frazer.clement:4593 to 4594) List-Archive: http://lists.mysql.com/commits/145107 Message-Id: <201210241617.q9OGHfWS001544@acsmt356.oracle.com> MIME-Version: 1.0 Content-Type: text/plain; charset="us-ascii" Content-Transfer-Encoding: 7bit 4594 Frazer Clement 2012-10-24 Commit patches in customer-specific test tree added: patches/ patches/1--3-5320311201-Fix-watchdog patches/2--3-6144827961-DIH-crash-fix patches/3--3-5890290931-Enable-large-hashmaps patches/series 4593 Martin Skold 2012-08-21 {clone-mysql-5.1.63-ndb-7.1.24-src-build} Fixed correct log_part to be displayed in ndbinfo modified: storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp === added directory 'patches' === added file 'patches/1--3-5320311201-Fix-watchdog' --- a/patches/1--3-5320311201-Fix-watchdog 1970-01-01 00:00:00 +0000 +++ b/patches/1--3-5320311201-Fix-watchdog 2012-10-24 16:13:46 +0000 @@ -0,0 +1,51 @@ +1--3-5320311201-Fix-watchdog +--- + storage/ndb/src/kernel/blocks/backup/Backup.cpp | 28 ++++++++++++++++++++++++ + 1 file changed, 28 insertions(+) + +Index: mysql-5.1-telco-7.1.24-alu/storage/ndb/src/kernel/blocks/backup/Backup.cpp +=================================================================== +--- mysql-5.1-telco-7.1.24-alu.orig/storage/ndb/src/kernel/blocks/backup/Backup.cpp 2012-10-24 15:54:04.413660000 +0100 ++++ mysql-5.1-telco-7.1.24-alu/storage/ndb/src/kernel/blocks/backup/Backup.cpp 2012-10-24 16:34:42.338498065 +0100 +@@ -4697,6 +4697,13 @@ Backup::ready_to_write(bool ready, Uint3 + ndbout << endl << "Current Millisecond is = "; + ndbout << NdbTick_CurrentMillisecond() << endl; + #endif ++ ++ if (ERROR_INSERTED(10043) && eof) ++ { ++ /* Block indefinitely without closing the file */ ++ return false; ++ } ++ + if ((ready || eof) && + m_words_written_this_period <= m_curr_disk_write_speed) + { +@@ -5954,6 +5961,27 @@ Backup::execLCP_STATUS_REQ(Signal* signa + conf->replicaDoneRowsHi, + conf->replicaDoneRowsLo); + } ++ else if (state == LcpStatusConf::LCP_SCANNED) ++ { ++ /* May take some time to drain the FS buffer, depending on ++ * size of buff, achieved rate. ++ * We'll track this as if it were replica done rows ++ * This should avoid false watchdog failures in systems ++ * with slow disks / bad config. ++ */ ++ BackupFilePtr filePtr; ++ c_backupFilePool.getPtr(filePtr, ptr.p->dataFilePtr); ++ ndbrequire(filePtr.p->backupPtr == ptr.i); ++ Uint64 flushBacklog = ++ filePtr.p->operation.dataBuffer.getUsableSize() - ++ filePtr.p->operation.dataBuffer.getFreeSize(); ++ ++ conf->tableId = 0; ++ conf->fragId = 0; ++ setWords(flushBacklog, ++ conf->replicaDoneRowsHi, ++ conf->replicaDoneRowsLo); ++ } + + failCode = 0; + } === added file 'patches/2--3-6144827961-DIH-crash-fix' --- a/patches/2--3-6144827961-DIH-crash-fix 1970-01-01 00:00:00 +0000 +++ b/patches/2--3-6144827961-DIH-crash-fix 2012-10-24 16:13:46 +0000 @@ -0,0 +1,390 @@ +2--3-6144827961-DIH-crash-fix +--- + storage/ndb/include/kernel/signaldata/DumpStateOrd.hpp | 1 + storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp | 16 + + storage/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp | 2 + storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp | 105 ++++++++++- + storage/ndb/src/kernel/vm/CountingSemaphore.hpp | 161 +++++++++++++++++ + 5 files changed, 276 insertions(+), 9 deletions(-) + +Index: mysql-5.1-telco-7.1.24-alu/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp +=================================================================== +--- mysql-5.1-telco-7.1.24-alu.orig/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp 2012-10-24 16:47:32.248498419 +0100 ++++ mysql-5.1-telco-7.1.24-alu/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp 2012-10-24 16:47:33.698498421 +0100 +@@ -1192,6 +1192,14 @@ void Dbdih::execFSWRITECONF(Signal* sign + break; + case FileRecord::TABLE_WRITE: + jam(); ++ if (ERROR_INSERTED(7235)) ++ { ++ jam(); ++ filePtr.p->reqStatus = status; ++ /* Suspend processing of WRITECONFs */ ++ sendSignalWithDelay(reference(), GSN_FSWRITECONF, signal, 1000, signal->getLength()); ++ return; ++ } + tableWriteLab(signal, filePtr); + break; + default: +@@ -13522,10 +13530,26 @@ void Dbdih::execLCP_FRAG_REP(Signal* sig + */ + tabPtr.p->tabLcpStatus = TabRecord::TLS_WRITING_TO_FILE; + tabPtr.p->tabCopyStatus = TabRecord::CS_LCP_READ_TABLE; +- tabPtr.p->tabUpdateState = TabRecord::US_LOCAL_CHECKPOINT; +- signal->theData[0] = DihContinueB::ZPACK_TABLE_INTO_PAGES; +- signal->theData[1] = tabPtr.i; +- sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB); ++ ++ /** ++ * Check whether we should write immediately, or queue... ++ */ ++ if (c_lcpTabDefWritesControl.requestMustQueue()) ++ { ++ jam(); ++ //ndbout_c("DIH : Queueing tab def flush op on table %u", tabPtr.i); ++ /* Mark as queued - will be started when an already running op completes */ ++ tabPtr.p->tabUpdateState = TabRecord::US_LOCAL_CHECKPOINT_QUEUED; ++ } ++ else ++ { ++ /* Run immediately */ ++ jam(); ++ tabPtr.p->tabUpdateState = TabRecord::US_LOCAL_CHECKPOINT; ++ signal->theData[0] = DihContinueB::ZPACK_TABLE_INTO_PAGES; ++ signal->theData[1] = tabPtr.i; ++ sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB); ++ } + + bool ret = checkLcpAllTablesDoneInLqh(__LINE__); + if (ret && ERROR_INSERTED(7209)) +@@ -14277,12 +14301,48 @@ void Dbdih::tableCloseLab(Signal* signal + case TabRecord::US_LOCAL_CHECKPOINT: + jam(); + releaseTabPages(tabPtr.i); +- signal->theData[0] = DihContinueB::ZCHECK_LCP_COMPLETED; +- sendSignal(reference(), GSN_CONTINUEB, signal, 1, JBB); + + tabPtr.p->tabCopyStatus = TabRecord::CS_IDLE; + tabPtr.p->tabUpdateState = TabRecord::US_IDLE; + tabPtr.p->tabLcpStatus = TabRecord::TLS_COMPLETED; ++ ++ /* Check whether there's some queued table definition flush op to start */ ++ if (c_lcpTabDefWritesControl.releaseMustStartQueued()) ++ { ++ jam(); ++ /* Some table write is queued - let's kick it off */ ++ /* First find it... ++ * By using the tabUpdateState to 'queue' operations, we lose ++ * the original flush request order, which shouldn't matter. ++ * In any case, the checkpoint proceeds by table id, as does this ++ * search, so a similar order should result ++ */ ++ TabRecordPtr tabPtr; ++ for (tabPtr.i = 0; tabPtr.i < ctabFileSize; tabPtr.i++) ++ { ++ ptrAss(tabPtr, tabRecord); ++ if (tabPtr.p->tabUpdateState == TabRecord::US_LOCAL_CHECKPOINT_QUEUED) ++ { ++ jam(); ++ //ndbout_c("DIH : Starting queued table def flush op on table %u", tabPtr.i); ++ tabPtr.p->tabUpdateState = TabRecord::US_LOCAL_CHECKPOINT; ++ signal->theData[0] = DihContinueB::ZPACK_TABLE_INTO_PAGES; ++ signal->theData[1] = tabPtr.i; ++ sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB); ++ return; ++ } ++ } ++ /* No queued table write found - error */ ++ ndbout_c("DIH : Error in queued table writes : inUse %u queued %u total %u", ++ c_lcpTabDefWritesControl.inUse, ++ c_lcpTabDefWritesControl.queuedRequests, ++ c_lcpTabDefWritesControl.totalResources); ++ ndbrequire(false); ++ } ++ jam(); ++ signal->theData[0] = DihContinueB::ZCHECK_LCP_COMPLETED; ++ sendSignal(reference(), GSN_CONTINUEB, signal, 1, JBB); ++ + return; + break; + case TabRecord::US_REMOVE_NODE: +@@ -18020,6 +18080,39 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal + } + } + ++ if (arg == DumpStateOrd::DihDumpPageRecInfo) ++ { ++ jam(); ++ ndbout_c("MAX_CONCURRENT_LCP_TAB_DEF_FLUSHES %u", MAX_CONCURRENT_LCP_TAB_DEF_FLUSHES); ++ ndbout_c("MAX_CONCURRENT_DIH_TAB_DEF_OPS %u", MAX_CONCURRENT_DIH_TAB_DEF_OPS); ++ ndbout_c("MAX_CRASHED_REPLICAS %u", MAX_CRASHED_REPLICAS); ++ ndbout_c("MAX_LCP_STORED %u", MAX_LCP_STORED); ++ ndbout_c("MAX_REPLICAS %u", MAX_REPLICAS); ++ ndbout_c("MAX_NDB_PARTITIONS %u", MAX_NDB_PARTITIONS); ++ ndbout_c("PACK_REPLICAS_WORDS %u", PACK_REPLICAS_WORDS); ++ ndbout_c("PACK_FRAGMENT_WORDS %u", PACK_FRAGMENT_WORDS); ++ ndbout_c("PACK_TABLE_WORDS %u", PACK_TABLE_WORDS); ++ ndbout_c("PACK_TABLE_PAGE_WORDS %u", PACK_TABLE_PAGE_WORDS); ++ ndbout_c("PACK_TABLE_PAGES %u", PACK_TABLE_PAGES); ++ ndbout_c("ZPAGEREC %u", ZPAGEREC); ++ ndbout_c("Total bytes : %lu", ZPAGEREC * sizeof(PageRecord)); ++ ndbout_c("LCP Tab def write ops inUse %u queued %u", ++ c_lcpTabDefWritesControl.inUse, ++ c_lcpTabDefWritesControl.queuedRequests); ++ Uint32 freeCount = 0; ++ PageRecordPtr tmp; ++ tmp.i = cfirstfreepage; ++ while (tmp.i != RNIL) ++ { ++ jam(); ++ ptrCheckGuard(tmp, cpageFileSize, pageRecord); ++ freeCount++; ++ tmp.i = tmp.p->nextfreepage; ++ }; ++ ndbout_c("Pages in use %u/%u", cpageFileSize - freeCount, cpageFileSize); ++ return; ++ } ++ + DECLARE_DUMP0(DBDIH, 7213, "Set error 7213 with extra arg") + { + SET_ERROR_INSERT_VALUE2(7213, signal->theData[1]); +Index: mysql-5.1-telco-7.1.24-alu/storage/ndb/src/kernel/vm/CountingSemaphore.hpp +=================================================================== +--- /dev/null 1970-01-01 00:00:00.000000000 +0000 ++++ mysql-5.1-telco-7.1.24-alu/storage/ndb/src/kernel/vm/CountingSemaphore.hpp 2012-10-24 16:47:33.708498421 +0100 +@@ -0,0 +1,161 @@ ++/* ++ Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved. ++ ++ This program is free software; you can redistribute it and/or modify ++ it under the terms of the GNU General Public License as published by ++ the Free Software Foundation; version 2 of the License. ++ ++ This program is distributed in the hope that it will be useful, ++ but WITHOUT ANY WARRANTY; without even the implied warranty of ++ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ++ GNU General Public License for more details. ++ ++ You should have received a copy of the GNU General Public License ++ along with this program; if not, write to the Free Software ++ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ++*/ ++ ++#ifndef COUNTING_SEMAPHORE_HPP ++#define COUNTING_SEMAPHORE_HPP ++ ++/** ++ * CountingSemaphore ++ * ++ * Helper for limiting concurrency on some resources. ++ * The Semaphore is created with some maximum concurrency level ++ * Up to this many resources may be concurrently used. ++ * When more than this number of resources are used concurrently, ++ * further requests must queue until a resource is released. ++ * ++ * This structure does not manage queueing and restarting of ++ * resource allocation requests, just monitors the number of ++ * resources in use, and the number of resource requests ++ * queued up. ++ * ++ * To be useful, some external request queueing and dequeuing ++ * mechanism is required. ++ */ ++ class CountingSemaphore ++ { ++ public: ++ CountingSemaphore(): ++ inUse(0), ++ queuedRequests(0), ++ totalResources(1) ++ {}; ++ ++ ~CountingSemaphore() {}; ++ ++ /** ++ * init ++ * Initialise the totalResources ++ */ ++ void init(Uint32 _totalResources) ++ { ++ assert(inUse == 0); ++ totalResources = _totalResources; ++ } ++ ++ /** ++ * requestMustQueue ++ * ++ * Part of semaphore P()/acquire()/down() implementation ++ * ++ * Called to request a resource. ++ * Returns whether the request must be queued, or ++ * can be satisfied immediately. ++ * ++ * true - no resource available, queue request. ++ * false - resource available, proceed. ++ * ++ * e.g. if (.requestMustQueue()) { ++ * queue_request; ++ * return; ++ * } ++ * ++ * proceed; ++ */ ++ bool requestMustQueue() ++ { ++ assert(inUse <= totalResources); ++ if (inUse == totalResources) ++ { ++ queuedRequests++; ++ return true; ++ } ++ else ++ { ++ assert(queuedRequests == 0); ++ inUse++; ++ return false; ++ } ++ } ++ ++ /** ++ * releaseMustStartQueued ++ * ++ * Part of semaphore V()/release()/up() ++ * ++ * Called to release a resource. ++ * Returns whether some queued resource request ++ * must be restarted. ++ * ++ * true - a queued request exists and must be started. ++ * false - no queued request exists, proceed. ++ * ++ * e.g. ++ * if (.releaseMustStartQueued()) { ++ * dequeue_request; ++ * begin_request_processing; ++ * } ++ * ++ * proceed; ++ */ ++ bool releaseMustStartQueued() ++ { ++ assert(inUse > 0); ++ if (queuedRequests > 0) ++ { ++ assert(inUse == totalResources); ++ queuedRequests--; ++ return true; ++ } ++ ++ inUse--; ++ return false; ++ } ++ ++ /** ++ * getTotalRequests ++ * ++ * Returns the sum of the inuse resources and queued requests. ++ * e.g. the offered concurrency on the resource. ++ */ ++ Uint32 getTotalRequests() const ++ { ++ return inUse + queuedRequests; ++ } ++ ++ /** ++ * getResourcesAvailable() ++ * ++ * Returns the number of resources available currently ++ */ ++ Uint32 getResourcesAvailable() const ++ { ++ assert(inUse <= totalResources); ++ return (totalResources - inUse); ++ } ++ ++ ++ /* inUse - number resources currently in use */ ++ Uint32 inUse; ++ ++ /* queuedRequests - number requests waiting 'outside' */ ++ Uint32 queuedRequests; ++ ++ /* totalResources - the maximum resources in use at one time */ ++ Uint32 totalResources; ++ }; /* CountingSemaphore */ ++ ++#endif +Index: mysql-5.1-telco-7.1.24-alu/storage/ndb/include/kernel/signaldata/DumpStateOrd.hpp +=================================================================== +--- mysql-5.1-telco-7.1.24-alu.orig/storage/ndb/include/kernel/signaldata/DumpStateOrd.hpp 2012-10-24 16:47:32.218498419 +0100 ++++ mysql-5.1-telco-7.1.24-alu/storage/ndb/include/kernel/signaldata/DumpStateOrd.hpp 2012-10-24 16:47:33.708498421 +0100 +@@ -155,6 +155,7 @@ public: + // 7019 + // 7020 + // 7021 ++ DihDumpPageRecInfo = 7032, + EnableUndoDelayDataWrite = 7080, // DIH+ACC+TUP + DihSetTimeBetweenGcp = 7090, + DihStartLcpImmediately = 7099, +Index: mysql-5.1-telco-7.1.24-alu/storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp +=================================================================== +--- mysql-5.1-telco-7.1.24-alu.orig/storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp 2012-10-24 16:47:32.238498419 +0100 ++++ mysql-5.1-telco-7.1.24-alu/storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp 2012-10-24 16:47:33.708498421 +0100 +@@ -29,6 +29,7 @@ + #include + #include + #include ++#include + + #ifdef DBDIH_C + +@@ -101,10 +102,13 @@ + /* SIZES */ + /*#########*/ + /* +- * Only pages enough for one table needed, since only +- * one metadata change at the time is allowed. ++ * Pages are used for flushing table definitions during LCP, ++ * and for other operations such as metadata changes etc ++ * + */ +-#define ZPAGEREC PACK_TABLE_PAGES ++#define MAX_CONCURRENT_LCP_TAB_DEF_FLUSHES 4 ++#define MAX_CONCURRENT_DIH_TAB_DEF_OPS (MAX_CONCURRENT_LCP_TAB_DEF_FLUSHES + 2) ++#define ZPAGEREC (MAX_CONCURRENT_DIH_TAB_DEF_OPS * PACK_TABLE_PAGES) + #define ZCREATE_REPLICA_FILE_SIZE 4 + #define ZPROXY_MASTER_FILE_SIZE 10 + #define ZPROXY_FILE_SIZE 10 +@@ -496,6 +500,7 @@ public: + enum UpdateState { + US_IDLE, + US_LOCAL_CHECKPOINT, ++ US_LOCAL_CHECKPOINT_QUEUED, + US_REMOVE_NODE, + US_COPY_TAB_REQ, + US_ADD_TABLE_MASTER, +@@ -1604,6 +1609,11 @@ private: + Uint32 c_set_initial_start_flag; + Uint64 c_current_time; // Updated approx. every 10ms + ++ /* Limit the number of concurrent table definition writes during LCP ++ * This avoids exhausting the DIH page pool ++ */ ++ CountingSemaphore c_lcpTabDefWritesControl; ++ + public: + enum LcpMasterTakeOverState { + LMTOS_IDLE = 0, +Index: mysql-5.1-telco-7.1.24-alu/storage/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp +=================================================================== +--- mysql-5.1-telco-7.1.24-alu.orig/storage/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp 2012-10-24 16:47:32.268498419 +0100 ++++ mysql-5.1-telco-7.1.24-alu/storage/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp 2012-10-24 16:47:33.708498421 +0100 +@@ -71,6 +71,8 @@ void Dbdih::initData() + c_set_initial_start_flag = FALSE; + c_sr_wait_to = false; + c_2pass_inr = false; ++ ++ c_lcpTabDefWritesControl.init(MAX_CONCURRENT_LCP_TAB_DEF_FLUSHES); + }//Dbdih::initData() + + void Dbdih::initRecords() === added file 'patches/3--3-5890290931-Enable-large-hashmaps' --- a/patches/3--3-5890290931-Enable-large-hashmaps 1970-01-01 00:00:00 +0000 +++ b/patches/3--3-5890290931-Enable-large-hashmaps 2012-10-24 16:13:46 +0000 @@ -0,0 +1,20 @@ +--- + storage/ndb/include/kernel/ndb_limits.h | 4 ++++ + 1 file changed, 4 insertions(+) + +Index: mysql-5.1-telco-7.1.24-alu/storage/ndb/include/kernel/ndb_limits.h +=================================================================== +--- mysql-5.1-telco-7.1.24-alu.orig/storage/ndb/include/kernel/ndb_limits.h 2012-10-24 15:54:04.413660000 +0100 ++++ mysql-5.1-telco-7.1.24-alu/storage/ndb/include/kernel/ndb_limits.h 2012-10-24 17:02:54.118498845 +0100 +@@ -220,7 +220,11 @@ + */ + + #if NDB_VERSION_D < NDB_MAKE_VERSION(7,2,0) ++#ifdef NDB_USE_LARGE_HASHMAPS ++#define NDB_DEFAULT_HASHMAP_BUCKETS (48 * 16 * 5) /* 3840 */ ++#else + #define NDB_DEFAULT_HASHMAP_BUCKETS 240 ++#endif + #else + #define NDB_DEFAULT_HASHMAP_BUCKETS (48 * 16 * 5) /* 3840 */ + #endif === added file 'patches/series' --- a/patches/series 1970-01-01 00:00:00 +0000 +++ b/patches/series 2012-10-24 16:13:46 +0000 @@ -0,0 +1,3 @@ +1--3-5320311201-Fix-watchdog +2--3-6144827961-DIH-crash-fix +3--3-5890290931-Enable-large-hashmaps No bundle (reason: useless for push emails).