List:Commits« Previous MessageNext Message »
From:jonas Date:August 2 2006 12:27pm
Subject:bk commit into 5.1 tree (jonas:1.2258)
View as plain text  
Below is the list of changes that have just been committed into a local
5.1 repository of jonas. When jonas does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet@stripped, 2006-08-02 14:27:05+02:00, jonas@stripped +7 -0
  ndb - Add possibility to limit disk write speed in backup(lcp)
        3 new paramters:
        DiskSyncSize - Outstanding disk writes before sync (default 4M)
        DiskCheckpointSpeed - Write speed of LCP in bytes/sec (default 10M)
        DiskCheckpointSpeedInRestart - As above but during LCP (default 100M)
  
        Depricated old NoOfDiskPagesToDisk*
  
      - Change NoOfFragmentLogFiles default to 16 (1Gb)

  storage/ndb/include/kernel/signaldata/BackupContinueB.hpp@stripped, 2006-08-02 14:27:03+02:00, jonas@stripped +2 -1
    Add possibility to limitat of disk write speed in backup

  storage/ndb/include/mgmapi/mgmapi_config_parameters.h@stripped, 2006-08-02 14:27:03+02:00, jonas@stripped +4 -0
    Add possibility to limitat of disk write speed in backup

  storage/ndb/src/kernel/blocks/backup/Backup.cpp@stripped, 2006-08-02 14:27:03+02:00, jonas@stripped +140 -35
    Add possibility to limitat of disk write speed in backup

  storage/ndb/src/kernel/blocks/backup/Backup.hpp@stripped, 2006-08-02 14:27:03+02:00, jonas@stripped +18 -1
    Add possibility to limitat of disk write speed in backup

  storage/ndb/src/kernel/blocks/backup/BackupInit.cpp@stripped, 2006-08-02 14:27:03+02:00, jonas@stripped +21 -1
    Add possibility to limitat of disk write speed in backup

  storage/ndb/src/mgmsrv/ConfigInfo.cpp@stripped, 2006-08-02 14:27:03+02:00, jonas@stripped +45 -9
    Add possibility to limitat of disk write speed in backup
    Change NoOfFragmentLogFiles default to 16 (1Gb)
    DiskSyncSize
    DiskCheckpointSpeed
    DiskCheckpointSpeedInRestart

  storage/ndb/src/mgmsrv/InitConfigFileParser.cpp@stripped, 2006-08-02 14:27:03+02:00, jonas@stripped +12 -0
    Handle deprication warning also in my.cnf format

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	jonas
# Host:	perch.ndb.mysql.com
# Root:	/home/jonas/src/51-work

--- 1.27/storage/ndb/include/mgmapi/mgmapi_config_parameters.h	2006-08-02 14:27:09 +02:00
+++ 1.28/storage/ndb/include/mgmapi/mgmapi_config_parameters.h	2006-08-02 14:27:09 +02:00
@@ -92,6 +92,10 @@
 #define CFG_DB_DISK_PAGE_BUFFER_MEMORY 160
 #define CFG_DB_STRING_MEMORY          161
 
+#define CFG_DB_DISK_SYNCH_SIZE        163
+#define CFG_DB_CHECKPOINT_SPEED       164
+#define CFG_DB_CHECKPOINT_SPEED_SR    165
+
 #define CFG_DB_SGA                    198 /* super pool mem */
 #define CFG_DB_DATA_MEM_2             199 /* used in special build in 5.1 */
 

--- 1.3/storage/ndb/include/kernel/signaldata/BackupContinueB.hpp	2006-08-02 14:27:09 +02:00
+++ 1.4/storage/ndb/include/kernel/signaldata/BackupContinueB.hpp	2006-08-02 14:27:09 +02:00
@@ -32,7 +32,8 @@
     BUFFER_FULL_SCAN  = 2,
     BUFFER_FULL_FRAG_COMPLETE = 3,
     BUFFER_FULL_META  = 4,
-    BACKUP_FRAGMENT_INFO = 5
+    BACKUP_FRAGMENT_INFO = 5,
+    RESET_DISK_SPEED_COUNTER = 6
   };
 };
 

--- 1.83/storage/ndb/src/mgmsrv/ConfigInfo.cpp	2006-08-02 14:27:09 +02:00
+++ 1.84/storage/ndb/src/mgmsrv/ConfigInfo.cpp	2006-08-02 14:27:09 +02:00
@@ -877,7 +877,7 @@
     ConfigInfo::CI_USED,
     false,
     ConfigInfo::CI_INT,
-    "8",
+    "16",
     "3",
     STR_VALUE(MAX_INT_RNIL) },
 
@@ -952,8 +952,8 @@
     CFG_DB_LCP_DISC_PAGES_TUP_SR,
     "NoOfDiskPagesToDiskDuringRestartTUP",
     DB_TOKEN,
-    "?",
-    ConfigInfo::CI_USED,
+    "DiskCheckpointSpeedSr",
+    ConfigInfo::CI_DEPRICATED,
     true,
     ConfigInfo::CI_INT,
     "40",
@@ -964,8 +964,8 @@
     CFG_DB_LCP_DISC_PAGES_TUP,
     "NoOfDiskPagesToDiskAfterRestartTUP",
     DB_TOKEN,
-    "?",
-    ConfigInfo::CI_USED,
+    "DiskCheckpointSpeed",
+    ConfigInfo::CI_DEPRICATED,
     true,
     ConfigInfo::CI_INT,
     "40",
@@ -976,8 +976,8 @@
     CFG_DB_LCP_DISC_PAGES_ACC_SR,
     "NoOfDiskPagesToDiskDuringRestartACC",
     DB_TOKEN,
-    "?",
-    ConfigInfo::CI_USED,
+    "DiskCheckpointSpeedSr",
+    ConfigInfo::CI_DEPRICATED,
     true,
     ConfigInfo::CI_INT,
     "20",
@@ -988,8 +988,8 @@
     CFG_DB_LCP_DISC_PAGES_ACC,
     "NoOfDiskPagesToDiskAfterRestartACC",
     DB_TOKEN,
-    "?",
-    ConfigInfo::CI_USED,
+    "DiskCheckpointSpeed",
+    ConfigInfo::CI_DEPRICATED,
     true,
     ConfigInfo::CI_INT,
     "20",
@@ -1190,6 +1190,42 @@
     ConfigInfo::CI_STRING,
     UNDEFINED,
     0, 0 },
+  
+  { 
+    CFG_DB_DISK_SYNCH_SIZE,
+    "DiskSyncSize",
+    DB_TOKEN,
+    "Data written to a file before a synch is forced",
+    ConfigInfo::CI_USED,
+    false,
+    ConfigInfo::CI_INT,
+    "4M",
+    "32k",
+    STR_VALUE(MAX_INT_RNIL) },
+  
+  { 
+    CFG_DB_CHECKPOINT_SPEED,
+    "DiskCheckpointSpeed",
+    DB_TOKEN,
+    "Bytes per second allowed to be written by checkpoint",
+    ConfigInfo::CI_USED,
+    false,
+    ConfigInfo::CI_INT,
+    "10M",
+    "1M",
+    STR_VALUE(MAX_INT_RNIL) },
+  
+  { 
+    CFG_DB_CHECKPOINT_SPEED_SR,
+    "DiskCheckpointSpeedInRestart",
+    DB_TOKEN,
+    "Bytes per second allowed to be written by checkpoint during restart",
+    ConfigInfo::CI_USED,
+    false,
+    ConfigInfo::CI_INT,
+    "100M",
+    "1M",
+    STR_VALUE(MAX_INT_RNIL) },
   
   { 
     CFG_DB_BACKUP_MEM,

--- 1.23/storage/ndb/src/mgmsrv/InitConfigFileParser.cpp	2006-08-02 14:27:09 +02:00
+++ 1.24/storage/ndb/src/mgmsrv/InitConfigFileParser.cpp	2006-08-02 14:27:09 +02:00
@@ -655,6 +655,18 @@
 			m_info->getMax(ctx.m_currentInfo, fname));
 	return false;
       }
+
+      ConfigInfo::Status status = m_info->getStatus(ctx.m_currentInfo, fname);
+      if (status == ConfigInfo::CI_DEPRICATED) {
+	const char * desc = m_info->getDescription(ctx.m_currentInfo, fname);
+	if(desc && desc[0]){
+	  ctx.reportWarning("[%s] %s is depricated, use %s instead", 
+			    ctx.fname, fname, desc);
+	} else if (desc == 0){
+	  ctx.reportWarning("[%s] %s is depricated", ctx.fname, fname);
+	} 
+      }
+      
       if (options[i].var_type == GET_INT)
 	ctx.m_currentSection->put(options[i].name, (Uint32)value_int);
       else

--- 1.48/storage/ndb/src/kernel/blocks/backup/Backup.cpp	2006-08-02 14:27:09 +02:00
+++ 1.49/storage/ndb/src/kernel/blocks/backup/Backup.cpp	2006-08-02 14:27:09 +02:00
@@ -84,6 +84,16 @@
   const Uint32 startphase  = signal->theData[1];
   const Uint32 typeOfStart = signal->theData[7];
 
+  if (startphase == 1)
+  {
+    m_curr_disk_write_speed = c_defaults.m_disk_write_speed_sr;
+    m_overflow_disk_write = 0;
+    m_reset_disk_speed_time = NdbTick_CurrentMillisecond();
+    m_reset_delay_used = Backup::DISK_SPEED_CHECK_DELAY;
+    signal->theData[0] = BackupContinueB::RESET_DISK_SPEED_COUNTER;
+    sendSignalWithDelay(BACKUP_REF, GSN_CONTINUEB, signal,
+                        Backup::DISK_SPEED_CHECK_DELAY, 1);
+  }
   if (startphase == 3) {
     jam();
     g_TypeOfStart = typeOfStart;
@@ -92,6 +102,11 @@
     return;
   }//if
 
+  if (startphase == 7)
+  {
+    m_curr_disk_write_speed = c_defaults.m_disk_write_speed;
+  }
+
   if(startphase == 7 && g_TypeOfStart == NodeState::ST_INITIAL_START &&
      c_masterNodeId == getOwnNodeId()){
     jam();
@@ -170,6 +185,42 @@
   const Uint32 Tdata2 = signal->theData[2];
   
   switch(Tdata0) {
+  case BackupContinueB::RESET_DISK_SPEED_COUNTER:
+  {
+    /*
+      Adjust for upto 10 millisecond delay of this signal. Longer
+      delays will not be handled, in this case the system is most
+      likely under too high load and it won't matter very much that
+      we decrease the speed of checkpoints.
+
+      We use a technique where we allow an overflow write in one
+      period. This overflow will be removed from the next period
+      such that the load will at average be as specified.
+    */
+    int delay_time = m_reset_delay_used;
+    NDB_TICKS curr_time = NdbTick_CurrentMillisecond();
+    int sig_delay = curr_time - m_reset_disk_speed_time;
+
+    m_words_written_this_period = m_overflow_disk_write;
+    m_overflow_disk_write = 0;
+    m_reset_disk_speed_time = curr_time;
+
+    if (sig_delay > delay_time + 10)
+      delay_time = Backup::DISK_SPEED_CHECK_DELAY - 10;
+    else if (sig_delay < delay_time - 10)
+      delay_time = Backup::DISK_SPEED_CHECK_DELAY + 10;
+    else
+      delay_time = Backup::DISK_SPEED_CHECK_DELAY - (sig_delay - delay_time);
+    m_reset_delay_used= delay_time;
+    signal->theData[0] = BackupContinueB::RESET_DISK_SPEED_COUNTER;
+    sendSignalWithDelay(BACKUP_REF, GSN_CONTINUEB, signal, delay_time, 1);
+#if 0
+    ndbout << "Signal delay was = " << sig_delay;
+    ndbout << " Current time = " << curr_time << endl;
+    ndbout << " Delay time will be = " << delay_time << endl << endl;
+#endif
+    break;
+  }
   case BackupContinueB::BACKUP_FRAGMENT_INFO:
   {
     const Uint32 ptr_I = Tdata1;
@@ -202,8 +253,8 @@
     fragInfo->FragmentNo = htonl(fragPtr_I);
     fragInfo->NoOfRecordsLow = htonl(fragPtr.p->noOfRecords & 0xFFFFFFFF);
     fragInfo->NoOfRecordsHigh = htonl(fragPtr.p->noOfRecords >> 32);
-    fragInfo->FilePosLow = htonl(0 & 0xFFFFFFFF);
-    fragInfo->FilePosHigh = htonl(0 >> 32);
+    fragInfo->FilePosLow = htonl(0);
+    fragInfo->FilePosHigh = htonl(0);
 
     filePtr.p->operation.dataBuffer.updateWritePtr(sz);
 
@@ -938,7 +989,7 @@
     return;
   }//if
 
-  if (m_diskless)
+  if (c_defaults.m_diskless)
   {
     sendBackupRef(senderRef, flags, signal, senderData, 
 		  BackupRef::CannotBackupDiskless);
@@ -2610,9 +2661,10 @@
     FsOpenReq::OM_WRITEONLY | 
     FsOpenReq::OM_TRUNCATE |
     FsOpenReq::OM_CREATE | 
-    FsOpenReq::OM_APPEND;
+    FsOpenReq::OM_APPEND |
+    FsOpenReq::OM_AUTOSYNC;
   FsOpenReq::v2_setCount(req->fileNumber, 0xFFFFFFFF);
-  
+  req->auto_sync_size = c_defaults.m_disk_synch_size;
   /**
    * Ctl file
    */
@@ -3881,6 +3933,69 @@
   checkFile(signal, filePtr);
 }
 
+/*
+  This routine handles two problems with writing to disk during local
+  checkpoints and backups. The first problem is that we need to limit
+  the writing to ensure that we don't use too much CPU and disk resources
+  for backups and checkpoints. The perfect solution to this is to use
+  a dynamic algorithm that adapts to the environment. Until we have
+  implemented this we can satisfy ourselves with an algorithm that
+  uses a configurable limit.
+
+  The second problem is that in Linux we can get severe problems if we
+  write very much to the disk without synching. In the worst case we
+  can have Gigabytes of data in the Linux page cache before we reach
+  the limit of how much we can write. If this happens the performance
+  will drop significantly when we reach this limit since the Linux flush
+  daemon will spend a few minutes on writing out the page cache to disk.
+  To avoid this we ensure that a file never have more than a certain
+  amount of data outstanding before synch. This variable is also
+  configurable.
+*/
+bool
+Backup::ready_to_write(bool ready, Uint32 sz, bool eof, BackupFile *fileP)
+{
+#if 0
+  ndbout << "ready_to_write: ready = " << ready << " eof = " << eof;
+  ndbout << " sz = " << sz << endl;
+  ndbout << "words this period = " << m_words_written_this_period;
+  ndbout << endl << "overflow disk write = " << m_overflow_disk_write;
+  ndbout << endl << "Current Millisecond is = ";
+  ndbout << NdbTick_CurrentMillisecond() << endl;
+#endif
+  if ((ready || eof) &&
+      m_words_written_this_period <= m_curr_disk_write_speed)
+  {
+    /*
+      We have a buffer ready to write or we have reached end of
+      file and thus we must write the last before closing the
+      file.
+      We have already check that we are allowed to write at this
+      moment. We only worry about history of last 100 milliseconds.
+      What happened before that is of no interest since a disk
+      write that was issued more than 100 milliseconds should be
+      completed by now.
+    */
+    int overflow;
+    m_words_written_this_period += sz;
+    overflow = m_words_written_this_period - m_curr_disk_write_speed;
+    if (overflow > 0)
+      m_overflow_disk_write = overflow;
+#if 0
+    ndbout << "Will write with " << endl;
+    ndbout << endl;
+#endif
+    return true;
+  }
+  else
+  {
+#if 0
+    ndbout << "Will not write now" << endl << endl;
+#endif
+    return false;
+  }
+}
+
 void
 Backup::checkFile(Signal* signal, BackupFilePtr filePtr)
 {
@@ -3890,35 +4005,23 @@
 #endif
 
   OperationRecord & op = filePtr.p->operation;
-  
-  Uint32 * tmp, sz; bool eof;
-  if(op.dataBuffer.getReadPtr(&tmp, &sz, &eof)) 
+  Uint32 *tmp = NULL;
+  Uint32 sz = 0;
+  bool eof = FALSE;
+  bool ready = op.dataBuffer.getReadPtr(&tmp, &sz, &eof); 
+#if 0
+  ndbout << "Ptr to data = " << hex << tmp << endl;
+#endif
+  if (!ready_to_write(ready, sz, eof, filePtr.p))
   {
     jam();
-    
-    jam();
-    FsAppendReq * req = (FsAppendReq *)signal->getDataPtrSend();
-    req->filePointer   = filePtr.p->filePointer;
-    req->userPointer   = filePtr.i;
-    req->userReference = reference();
-    req->varIndex      = 0;
-    req->offset        = tmp - c_startOfPages;
-    req->size          = sz;
-    
-    sendSignal(NDBFS_REF, GSN_FSAPPENDREQ, signal, 
-	       FsAppendReq::SignalLength, JBA);
-    return;
-  }
-  
-  if(!eof) {
-    jam();
     signal->theData[0] = BackupContinueB::BUFFER_UNDERFLOW;
     signal->theData[1] = filePtr.i;
-    sendSignalWithDelay(BACKUP_REF, GSN_CONTINUEB, signal, 50, 2);
+    sendSignalWithDelay(BACKUP_REF, GSN_CONTINUEB, signal, 20, 2);
     return;
-  }//if
-  
-  if(sz > 0) {
+  }
+  else if (sz > 0)
+  {
     jam();
     FsAppendReq * req = (FsAppendReq *)signal->getDataPtrSend();
     req->filePointer   = filePtr.p->filePointer;
@@ -3926,13 +4029,14 @@
     req->userReference = reference();
     req->varIndex      = 0;
     req->offset        = tmp - c_startOfPages;
-    req->size          = sz; // Round up
+    req->size          = sz;
+    req->synch_flag    = 0;
     
     sendSignal(NDBFS_REF, GSN_FSAPPENDREQ, signal, 
 	       FsAppendReq::SignalLength, JBA);
     return;
-  }//if
-
+  }
+  
 #ifdef DEBUG_ABORT
   Uint32 running= filePtr.p->fileRunning;
   Uint32 closing= filePtr.p->fileClosing;
@@ -4214,16 +4318,15 @@
       continue;
     }//if
 
+    filePtr.p->operation.dataBuffer.eof();
     if(filePtr.p->fileRunning == 1){
       jam();
 #ifdef DEBUG_ABORT
       ndbout_c("Close files fileRunning == 1, filePtr.i=%u", filePtr.i);
 #endif
-      filePtr.p->operation.dataBuffer.eof();
     } else {
       jam();
       filePtr.p->fileClosing = 1;
-      filePtr.p->operation.dataBuffer.eof();
       checkFile(sig, filePtr); // make sure we write everything before closing
 
       FsCloseReq * req = (FsCloseReq *)sig->getDataPtrSend();
@@ -4712,8 +4815,10 @@
     FsOpenReq::OM_WRITEONLY | 
     FsOpenReq::OM_TRUNCATE |
     FsOpenReq::OM_CREATE | 
-    FsOpenReq::OM_APPEND;
+    FsOpenReq::OM_APPEND |
+    FsOpenReq::OM_AUTOSYNC;
   FsOpenReq::v2_setCount(req->fileNumber, 0xFFFFFFFF);
+  req->auto_sync_size = c_defaults.m_disk_synch_size;
   
   TablePtr tabPtr;
   FragmentPtr fragPtr;

--- 1.20/storage/ndb/src/kernel/blocks/backup/Backup.hpp	2006-08-02 14:27:09 +02:00
+++ 1.21/storage/ndb/src/kernel/blocks/backup/Backup.hpp	2006-08-02 14:27:09 +02:00
@@ -33,6 +33,7 @@
 #include <blocks/mutexes.hpp>
 
 #include <NdbTCP.h>
+#include <NdbTick.h>
 #include <Array.hpp>
 
 /**
@@ -522,6 +523,11 @@
     Uint32 m_minWriteSize;
     Uint32 m_maxWriteSize;
     Uint32 m_lcp_buffer_size;
+    
+    Uint32 m_disk_write_speed_sr;
+    Uint32 m_disk_write_speed;
+    Uint32 m_disk_synch_size;
+    Uint32 m_diskless;
   };
   
   /**
@@ -533,8 +539,17 @@
   NdbNodeBitmask c_aliveNodes;
   DLList<BackupRecord> c_backups;
   Config c_defaults;
-  Uint32 m_diskless;
 
+  /*
+    Variables that control checkpoint to disk speed
+  */
+  Uint32 m_curr_disk_write_speed;
+  Uint32 m_words_written_this_period;
+  Uint32 m_overflow_disk_write;
+  Uint32 m_reset_delay_used;
+  NDB_TICKS m_reset_disk_speed_time;
+  static const int  DISK_SPEED_CHECK_DELAY = 100;
+  
   STATIC_CONST(NO_OF_PAGES_META_FILE = MAX_WORDS_META_FILE/BACKUP_WORDS_PER_PAGE);
 
   /**
@@ -631,6 +646,8 @@
   void lcp_open_file_done(Signal*, BackupRecordPtr);
   void lcp_close_file_conf(Signal* signal, BackupRecordPtr);
   void lcp_send_end_lcp_conf(Signal* signal, BackupRecordPtr);
+
+  bool ready_to_write(bool ready, Uint32 sz, bool eof, BackupFile *fileP);
 };
 
 inline

--- 1.24/storage/ndb/src/kernel/blocks/backup/BackupInit.cpp	2006-08-02 14:27:09 +02:00
+++ 1.25/storage/ndb/src/kernel/blocks/backup/BackupInit.cpp	2006-08-02 14:27:09 +02:00
@@ -146,8 +146,28 @@
     m_ctx.m_config.getOwnConfigIterator();
   ndbrequire(p != 0);
 
+  c_defaults.m_disk_write_speed = 10 * (1024 * 1024);
+  c_defaults.m_disk_write_speed_sr = 100 * (1024 * 1024);
+  c_defaults.m_disk_synch_size = 4 * (1024 * 1024);
+  
   Uint32 noBackups = 0, noTables = 0, noAttribs = 0, noFrags = 0;
-  ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_DB_DISCLESS, &m_diskless));
+  ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_DB_DISCLESS, 
+					&c_defaults.m_diskless));
+  ndb_mgm_get_int_parameter(p, CFG_DB_CHECKPOINT_SPEED_SR,
+			    &c_defaults.m_disk_write_speed_sr);
+  ndb_mgm_get_int_parameter(p, CFG_DB_CHECKPOINT_SPEED,
+			    &c_defaults.m_disk_write_speed);
+  ndb_mgm_get_int_parameter(p, CFG_DB_DISK_SYNCH_SIZE,
+			    &c_defaults.m_disk_synch_size);
+
+  /*
+    We adjust the disk speed parameters from bytes per second to rather be
+    words per 100 milliseconds. We convert disk synch size from bytes per
+    second to words per second.
+  */
+  c_defaults.m_disk_write_speed /= (4 * 10);
+  c_defaults.m_disk_write_speed_sr /= (4 * 10);
+
   ndb_mgm_get_int_parameter(p, CFG_DB_PARALLEL_BACKUPS, &noBackups);
   //  ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_DB_NO_TABLES, &noTables));
   ndbrequire(!ndb_mgm_get_int_parameter(p, CFG_DICT_TABLE, &noTables));
Thread
bk commit into 5.1 tree (jonas:1.2258)jonas2 Aug