List:Commits« Previous MessageNext Message »
From:Frazer Clement Date:October 24 2012 4:17pm
Subject:bzr push into mysql-5.1-telco-7.1 branch (frazer.clement:4593 to 4594)
View as plain text  
 4594 Frazer Clement	2012-10-24
      Commit patches in customer-specific test tree

    added:
      patches/
      patches/1--3-5320311201-Fix-watchdog
      patches/2--3-6144827961-DIH-crash-fix
      patches/3--3-5890290931-Enable-large-hashmaps
      patches/series
 4593 Martin Skold	2012-08-21 {clone-mysql-5.1.63-ndb-7.1.24-src-build}
      Fixed correct log_part to be displayed in ndbinfo

    modified:
      storage/ndb/src/kernel/blocks/dblqh/DblqhMain.cpp
=== added directory 'patches'
=== added file 'patches/1--3-5320311201-Fix-watchdog'
--- a/patches/1--3-5320311201-Fix-watchdog	1970-01-01 00:00:00 +0000
+++ b/patches/1--3-5320311201-Fix-watchdog	2012-10-24 16:13:46 +0000
@@ -0,0 +1,51 @@
+1--3-5320311201-Fix-watchdog
+---
+ storage/ndb/src/kernel/blocks/backup/Backup.cpp |   28 ++++++++++++++++++++++++
+ 1 file changed, 28 insertions(+)
+
+Index: mysql-5.1-telco-7.1.24-alu/storage/ndb/src/kernel/blocks/backup/Backup.cpp
+===================================================================
+--- mysql-5.1-telco-7.1.24-alu.orig/storage/ndb/src/kernel/blocks/backup/Backup.cpp	2012-10-24 15:54:04.413660000 +0100
++++ mysql-5.1-telco-7.1.24-alu/storage/ndb/src/kernel/blocks/backup/Backup.cpp	2012-10-24 16:34:42.338498065 +0100
+@@ -4697,6 +4697,13 @@ Backup::ready_to_write(bool ready, Uint3
+   ndbout << endl << "Current Millisecond is = ";
+   ndbout << NdbTick_CurrentMillisecond() << endl;
+ #endif
++
++  if (ERROR_INSERTED(10043) && eof)
++  {
++    /* Block indefinitely without closing the file */
++    return false;
++  }
++
+   if ((ready || eof) &&
+       m_words_written_this_period <= m_curr_disk_write_speed)
+   {
+@@ -5954,6 +5961,27 @@ Backup::execLCP_STATUS_REQ(Signal* signa
+                  conf->replicaDoneRowsHi,
+                  conf->replicaDoneRowsLo);
+       }
++      else if (state == LcpStatusConf::LCP_SCANNED)
++      {
++        /* May take some time to drain the FS buffer, depending on
++         * size of buff, achieved rate.
++         * We'll track this as if it were replica done rows
++         * This should avoid false watchdog failures in systems
++         * with slow disks / bad config.
++         */
++        BackupFilePtr filePtr;
++        c_backupFilePool.getPtr(filePtr, ptr.p->dataFilePtr);
++        ndbrequire(filePtr.p->backupPtr == ptr.i);
++        Uint64 flushBacklog = 
++          filePtr.p->operation.dataBuffer.getUsableSize() -
++          filePtr.p->operation.dataBuffer.getFreeSize();
++        
++        conf->tableId = 0;
++        conf->fragId = 0;
++        setWords(flushBacklog,
++                 conf->replicaDoneRowsHi,
++                 conf->replicaDoneRowsLo);
++      }
+       
+       failCode = 0;
+     }

=== added file 'patches/2--3-6144827961-DIH-crash-fix'
--- a/patches/2--3-6144827961-DIH-crash-fix	1970-01-01 00:00:00 +0000
+++ b/patches/2--3-6144827961-DIH-crash-fix	2012-10-24 16:13:46 +0000
@@ -0,0 +1,390 @@
+2--3-6144827961-DIH-crash-fix
+---
+ storage/ndb/include/kernel/signaldata/DumpStateOrd.hpp |    1 
+ storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp          |   16 +
+ storage/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp      |    2 
+ storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp      |  105 ++++++++++-
+ storage/ndb/src/kernel/vm/CountingSemaphore.hpp        |  161 +++++++++++++++++
+ 5 files changed, 276 insertions(+), 9 deletions(-)
+
+Index: mysql-5.1-telco-7.1.24-alu/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp
+===================================================================
+--- mysql-5.1-telco-7.1.24-alu.orig/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp	2012-10-24 16:47:32.248498419 +0100
++++ mysql-5.1-telco-7.1.24-alu/storage/ndb/src/kernel/blocks/dbdih/DbdihMain.cpp	2012-10-24 16:47:33.698498421 +0100
+@@ -1192,6 +1192,14 @@ void Dbdih::execFSWRITECONF(Signal* sign
+     break;
+   case FileRecord::TABLE_WRITE:
+     jam();
++    if (ERROR_INSERTED(7235))
++    {
++      jam();
++      filePtr.p->reqStatus = status;
++      /* Suspend processing of WRITECONFs */
++      sendSignalWithDelay(reference(), GSN_FSWRITECONF, signal, 1000, signal->getLength());
++      return;
++    }
+     tableWriteLab(signal, filePtr);
+     break;
+   default:
+@@ -13522,10 +13530,26 @@ void Dbdih::execLCP_FRAG_REP(Signal* sig
+        */
+       tabPtr.p->tabLcpStatus = TabRecord::TLS_WRITING_TO_FILE;
+       tabPtr.p->tabCopyStatus = TabRecord::CS_LCP_READ_TABLE;
+-      tabPtr.p->tabUpdateState = TabRecord::US_LOCAL_CHECKPOINT;
+-      signal->theData[0] = DihContinueB::ZPACK_TABLE_INTO_PAGES;
+-      signal->theData[1] = tabPtr.i;
+-      sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
++
++      /**
++       * Check whether we should write immediately, or queue...
++       */
++      if (c_lcpTabDefWritesControl.requestMustQueue())
++      {
++        jam();
++        //ndbout_c("DIH : Queueing tab def flush op on table %u", tabPtr.i);
++        /* Mark as queued - will be started when an already running op completes */
++        tabPtr.p->tabUpdateState = TabRecord::US_LOCAL_CHECKPOINT_QUEUED;
++      }
++      else
++      {
++        /* Run immediately */
++        jam();
++        tabPtr.p->tabUpdateState = TabRecord::US_LOCAL_CHECKPOINT;
++        signal->theData[0] = DihContinueB::ZPACK_TABLE_INTO_PAGES;
++        signal->theData[1] = tabPtr.i;
++        sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
++      }
+       
+       bool ret = checkLcpAllTablesDoneInLqh(__LINE__);
+       if (ret && ERROR_INSERTED(7209))
+@@ -14277,12 +14301,48 @@ void Dbdih::tableCloseLab(Signal* signal
+   case TabRecord::US_LOCAL_CHECKPOINT:
+     jam();
+     releaseTabPages(tabPtr.i);
+-    signal->theData[0] = DihContinueB::ZCHECK_LCP_COMPLETED;
+-    sendSignal(reference(), GSN_CONTINUEB, signal, 1, JBB);
+ 
+     tabPtr.p->tabCopyStatus = TabRecord::CS_IDLE;
+     tabPtr.p->tabUpdateState = TabRecord::US_IDLE;
+     tabPtr.p->tabLcpStatus = TabRecord::TLS_COMPLETED;
++
++    /* Check whether there's some queued table definition flush op to start */
++    if (c_lcpTabDefWritesControl.releaseMustStartQueued())
++    {
++      jam();
++      /* Some table write is queued - let's kick it off */
++      /* First find it...
++       *   By using the tabUpdateState to 'queue' operations, we lose
++       *   the original flush request order, which shouldn't matter.
++       *   In any case, the checkpoint proceeds by table id, as does this
++       *   search, so a similar order should result
++       */
++      TabRecordPtr tabPtr;
++      for (tabPtr.i = 0; tabPtr.i < ctabFileSize; tabPtr.i++)
++      {
++        ptrAss(tabPtr, tabRecord);
++        if (tabPtr.p->tabUpdateState == TabRecord::US_LOCAL_CHECKPOINT_QUEUED)
++        {
++          jam();
++          //ndbout_c("DIH : Starting queued table def flush op on table %u", tabPtr.i);
++          tabPtr.p->tabUpdateState = TabRecord::US_LOCAL_CHECKPOINT;
++          signal->theData[0] = DihContinueB::ZPACK_TABLE_INTO_PAGES;
++          signal->theData[1] = tabPtr.i;
++          sendSignal(reference(), GSN_CONTINUEB, signal, 2, JBB);
++          return;
++        }
++      }
++      /* No queued table write found - error */
++      ndbout_c("DIH : Error in queued table writes : inUse %u queued %u total %u",
++               c_lcpTabDefWritesControl.inUse,
++               c_lcpTabDefWritesControl.queuedRequests,
++               c_lcpTabDefWritesControl.totalResources);
++      ndbrequire(false);
++    }
++    jam();
++    signal->theData[0] = DihContinueB::ZCHECK_LCP_COMPLETED;
++    sendSignal(reference(), GSN_CONTINUEB, signal, 1, JBB);
++
+     return;
+     break;
+   case TabRecord::US_REMOVE_NODE:
+@@ -18020,6 +18080,39 @@ Dbdih::execDUMP_STATE_ORD(Signal* signal
+     }
+   }
+ 
++  if (arg == DumpStateOrd::DihDumpPageRecInfo)
++  {
++    jam();
++    ndbout_c("MAX_CONCURRENT_LCP_TAB_DEF_FLUSHES %u", MAX_CONCURRENT_LCP_TAB_DEF_FLUSHES);
++    ndbout_c("MAX_CONCURRENT_DIH_TAB_DEF_OPS %u", MAX_CONCURRENT_DIH_TAB_DEF_OPS);
++    ndbout_c("MAX_CRASHED_REPLICAS %u", MAX_CRASHED_REPLICAS);
++    ndbout_c("MAX_LCP_STORED %u", MAX_LCP_STORED);
++    ndbout_c("MAX_REPLICAS %u", MAX_REPLICAS);
++    ndbout_c("MAX_NDB_PARTITIONS %u", MAX_NDB_PARTITIONS);
++    ndbout_c("PACK_REPLICAS_WORDS %u", PACK_REPLICAS_WORDS);
++    ndbout_c("PACK_FRAGMENT_WORDS %u", PACK_FRAGMENT_WORDS);
++    ndbout_c("PACK_TABLE_WORDS %u", PACK_TABLE_WORDS);
++    ndbout_c("PACK_TABLE_PAGE_WORDS %u", PACK_TABLE_PAGE_WORDS);
++    ndbout_c("PACK_TABLE_PAGES %u", PACK_TABLE_PAGES);
++    ndbout_c("ZPAGEREC %u", ZPAGEREC);
++    ndbout_c("Total bytes : %lu", ZPAGEREC * sizeof(PageRecord));
++    ndbout_c("LCP Tab def write ops inUse %u queued %u",
++             c_lcpTabDefWritesControl.inUse,
++             c_lcpTabDefWritesControl.queuedRequests);
++    Uint32 freeCount = 0;
++    PageRecordPtr tmp;
++    tmp.i = cfirstfreepage;
++    while (tmp.i != RNIL)
++    {
++      jam();
++      ptrCheckGuard(tmp, cpageFileSize, pageRecord);
++      freeCount++;
++      tmp.i = tmp.p->nextfreepage;
++    };
++    ndbout_c("Pages in use %u/%u", cpageFileSize - freeCount, cpageFileSize);
++    return;
++  }
++
+   DECLARE_DUMP0(DBDIH, 7213, "Set error 7213 with extra arg")
+   {
+     SET_ERROR_INSERT_VALUE2(7213, signal->theData[1]);
+Index: mysql-5.1-telco-7.1.24-alu/storage/ndb/src/kernel/vm/CountingSemaphore.hpp
+===================================================================
+--- /dev/null	1970-01-01 00:00:00.000000000 +0000
++++ mysql-5.1-telco-7.1.24-alu/storage/ndb/src/kernel/vm/CountingSemaphore.hpp	2012-10-24 16:47:33.708498421 +0100
+@@ -0,0 +1,161 @@
++/*
++   Copyright (c) 2012 Oracle and/or its affiliates. All rights reserved.
++
++   This program is free software; you can redistribute it and/or modify
++   it under the terms of the GNU General Public License as published by
++   the Free Software Foundation; version 2 of the License.
++
++   This program is distributed in the hope that it will be useful,
++   but WITHOUT ANY WARRANTY; without even the implied warranty of
++   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
++   GNU General Public License for more details.
++
++   You should have received a copy of the GNU General Public License
++   along with this program; if not, write to the Free Software
++   Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301  USA
++*/
++
++#ifndef COUNTING_SEMAPHORE_HPP
++#define COUNTING_SEMAPHORE_HPP
++
++/**
++  * CountingSemaphore
++  *
++  * Helper for limiting concurrency on some resources.
++  * The Semaphore is created with some maximum concurrency level
++  * Up to this many resources may be concurrently used.
++  * When more than this number of resources are used concurrently,
++  * further requests must queue until a resource is released.
++  * 
++  * This structure does not manage queueing and restarting of 
++  * resource allocation requests, just monitors the number of
++  * resources in use, and the number of resource requests 
++  * queued up.
++  *
++  * To be useful, some external request queueing and dequeuing
++  * mechanism is required.
++  */
++ class CountingSemaphore
++ {
++ public:
++   CountingSemaphore():
++     inUse(0),
++     queuedRequests(0),
++     totalResources(1)
++     {};
++
++   ~CountingSemaphore() {};
++
++   /** 
++    * init
++    * Initialise the totalResources
++    */
++   void init(Uint32 _totalResources)
++   {
++     assert(inUse == 0);
++     totalResources = _totalResources;
++   }
++
++   /**
++    * requestMustQueue
++    * 
++    * Part of semaphore P()/acquire()/down() implementation
++    * 
++    * Called to request a resource.
++    * Returns whether the request must be queued, or
++    * can be satisfied immediately.
++    *
++    * true  - no resource available, queue request.
++    * false - resource available, proceed.
++    *
++    * e.g. if (<sema>.requestMustQueue()) {
++    *        queue_request;
++    *        return;
++    *      }
++    *
++    *      proceed;
++    */
++   bool requestMustQueue()
++   {
++     assert(inUse <= totalResources);
++     if (inUse == totalResources)
++     {
++       queuedRequests++;
++       return true;
++     }
++     else
++     {
++       assert(queuedRequests == 0);
++       inUse++;
++       return false;
++     }
++   }
++   
++   /**
++    * releaseMustStartQueued
++    *
++    * Part of semaphore V()/release()/up()
++    *
++    * Called to release a resource.
++    * Returns whether some queued resource request
++    * must be restarted.
++    *
++    * true  - a queued request exists and must be started.
++    * false - no queued request exists, proceed.
++    *
++    * e.g.
++    *   if (<sema>.releaseMustStartQueued()) {
++    *     dequeue_request;
++    *     begin_request_processing;
++    *   }
++    *   
++    *   proceed;
++    */
++   bool releaseMustStartQueued()
++   {
++     assert(inUse > 0);
++     if (queuedRequests > 0)
++     {
++       assert(inUse == totalResources);
++       queuedRequests--;
++       return true;
++     }
++
++     inUse--;
++     return false;
++   }
++
++   /**
++    * getTotalRequests
++    * 
++    * Returns the sum of the inuse resources and queued requests.
++    * e.g. the offered concurrency on the resource.
++    */
++   Uint32 getTotalRequests() const
++   {
++     return inUse + queuedRequests;
++   }
++
++   /**
++    * getResourcesAvailable()
++    * 
++    * Returns the number of resources available currently
++    */
++   Uint32 getResourcesAvailable() const
++   {
++     assert(inUse <= totalResources);
++     return (totalResources - inUse);
++   }
++
++
++   /* inUse - number resources currently in use */
++   Uint32 inUse;
++   
++   /* queuedRequests - number requests waiting 'outside' */
++   Uint32 queuedRequests;
++   
++   /* totalResources - the maximum resources in use at one time */
++   Uint32 totalResources;
++ }; /* CountingSemaphore */
++
++#endif
+Index: mysql-5.1-telco-7.1.24-alu/storage/ndb/include/kernel/signaldata/DumpStateOrd.hpp
+===================================================================
+--- mysql-5.1-telco-7.1.24-alu.orig/storage/ndb/include/kernel/signaldata/DumpStateOrd.hpp	2012-10-24 16:47:32.218498419 +0100
++++ mysql-5.1-telco-7.1.24-alu/storage/ndb/include/kernel/signaldata/DumpStateOrd.hpp	2012-10-24 16:47:33.708498421 +0100
+@@ -155,6 +155,7 @@ public:
+     // 7019
+     // 7020
+     // 7021
++    DihDumpPageRecInfo = 7032,
+     EnableUndoDelayDataWrite = 7080, // DIH+ACC+TUP
+     DihSetTimeBetweenGcp = 7090,
+     DihStartLcpImmediately = 7099,
+Index: mysql-5.1-telco-7.1.24-alu/storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp
+===================================================================
+--- mysql-5.1-telco-7.1.24-alu.orig/storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp	2012-10-24 16:47:32.238498419 +0100
++++ mysql-5.1-telco-7.1.24-alu/storage/ndb/src/kernel/blocks/dbdih/Dbdih.hpp	2012-10-24 16:47:33.708498421 +0100
+@@ -29,6 +29,7 @@
+ #include <blocks/mutexes.hpp>
+ #include <signaldata/LCP.hpp>
+ #include <NdbSeqLock.hpp>
++#include <CountingSemaphore.hpp>
+ 
+ #ifdef DBDIH_C
+ 
+@@ -101,10 +102,13 @@
+ /* SIZES   */
+ /*#########*/
+ /*
+- * Only pages enough for one table needed, since only
+- * one metadata change at the time is allowed.
++ * Pages are used for flushing table definitions during LCP,
++ * and for other operations such as metadata changes etc
++ * 
+  */
+-#define ZPAGEREC PACK_TABLE_PAGES
++#define MAX_CONCURRENT_LCP_TAB_DEF_FLUSHES 4
++#define MAX_CONCURRENT_DIH_TAB_DEF_OPS (MAX_CONCURRENT_LCP_TAB_DEF_FLUSHES + 2)
++#define ZPAGEREC (MAX_CONCURRENT_DIH_TAB_DEF_OPS * PACK_TABLE_PAGES)
+ #define ZCREATE_REPLICA_FILE_SIZE 4
+ #define ZPROXY_MASTER_FILE_SIZE 10
+ #define ZPROXY_FILE_SIZE 10
+@@ -496,6 +500,7 @@ public:
+     enum UpdateState {
+       US_IDLE,
+       US_LOCAL_CHECKPOINT,
++      US_LOCAL_CHECKPOINT_QUEUED,
+       US_REMOVE_NODE,
+       US_COPY_TAB_REQ,
+       US_ADD_TABLE_MASTER,
+@@ -1604,6 +1609,11 @@ private:
+   Uint32 c_set_initial_start_flag;
+   Uint64 c_current_time; // Updated approx. every 10ms
+ 
++  /* Limit the number of concurrent table definition writes during LCP
++   * This avoids exhausting the DIH page pool
++   */
++  CountingSemaphore c_lcpTabDefWritesControl;
++
+ public:
+   enum LcpMasterTakeOverState {
+     LMTOS_IDLE = 0,
+Index: mysql-5.1-telco-7.1.24-alu/storage/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp
+===================================================================
+--- mysql-5.1-telco-7.1.24-alu.orig/storage/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp	2012-10-24 16:47:32.268498419 +0100
++++ mysql-5.1-telco-7.1.24-alu/storage/ndb/src/kernel/blocks/dbdih/DbdihInit.cpp	2012-10-24 16:47:33.708498421 +0100
+@@ -71,6 +71,8 @@ void Dbdih::initData()
+   c_set_initial_start_flag = FALSE;
+   c_sr_wait_to = false;
+   c_2pass_inr = false;
++
++  c_lcpTabDefWritesControl.init(MAX_CONCURRENT_LCP_TAB_DEF_FLUSHES);
+ }//Dbdih::initData()
+ 
+ void Dbdih::initRecords()

=== added file 'patches/3--3-5890290931-Enable-large-hashmaps'
--- a/patches/3--3-5890290931-Enable-large-hashmaps	1970-01-01 00:00:00 +0000
+++ b/patches/3--3-5890290931-Enable-large-hashmaps	2012-10-24 16:13:46 +0000
@@ -0,0 +1,20 @@
+---
+ storage/ndb/include/kernel/ndb_limits.h |    4 ++++
+ 1 file changed, 4 insertions(+)
+
+Index: mysql-5.1-telco-7.1.24-alu/storage/ndb/include/kernel/ndb_limits.h
+===================================================================
+--- mysql-5.1-telco-7.1.24-alu.orig/storage/ndb/include/kernel/ndb_limits.h	2012-10-24 15:54:04.413660000 +0100
++++ mysql-5.1-telco-7.1.24-alu/storage/ndb/include/kernel/ndb_limits.h	2012-10-24 17:02:54.118498845 +0100
+@@ -220,7 +220,11 @@
+  */
+ 
+ #if NDB_VERSION_D < NDB_MAKE_VERSION(7,2,0)
++#ifdef NDB_USE_LARGE_HASHMAPS
++#define NDB_DEFAULT_HASHMAP_BUCKETS (48 * 16 * 5) /* 3840 */
++#else
+ #define NDB_DEFAULT_HASHMAP_BUCKETS 240
++#endif
+ #else
+ #define NDB_DEFAULT_HASHMAP_BUCKETS (48 * 16 * 5) /* 3840 */
+ #endif

=== added file 'patches/series'
--- a/patches/series	1970-01-01 00:00:00 +0000
+++ b/patches/series	2012-10-24 16:13:46 +0000
@@ -0,0 +1,3 @@
+1--3-5320311201-Fix-watchdog
+2--3-6144827961-DIH-crash-fix
+3--3-5890290931-Enable-large-hashmaps

No bundle (reason: useless for push emails).
Thread
bzr push into mysql-5.1-telco-7.1 branch (frazer.clement:4593 to 4594) Frazer Clement25 Oct