List:Commits« Previous MessageNext Message »
From:tomas Date:June 5 2007 5:09pm
Subject:bk commit into 5.1 tree (tomas:1.2550)
View as plain text  
Below is the list of changes that have just been committed into a local
5.1 repository of tomas. When tomas does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet
  1.2550 07/06/05 17:08:55 tomas@stripped +7 -0
  Merge poseidon.mysql.com:/home/tomas/mysql-5.1-telco-gca
  into  poseidon.mysql.com:/home/tomas/mysql-5.1-telco-6.1

  storage/ndb/src/mgmsrv/ConfigInfo.cpp
    1.100 07/06/05 17:08:45 tomas@stripped +0 -0
    Auto merged

  storage/ndb/src/kernel/vm/WatchDog.cpp
    1.10 07/06/05 17:08:45 tomas@stripped +0 -0
    Auto merged

  storage/ndb/src/kernel/vm/SimulatedBlock.hpp
    1.31 07/06/05 17:08:44 tomas@stripped +0 -0
    Auto merged

  storage/ndb/src/kernel/vm/Configuration.hpp
    1.24 07/06/05 17:08:44 tomas@stripped +0 -0
    Auto merged

  storage/ndb/src/kernel/vm/Configuration.cpp
    1.58 07/06/05 17:08:44 tomas@stripped +0 -0
    Auto merged

  storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp
    1.51 07/06/05 17:08:44 tomas@stripped +0 -0
    Auto merged

  storage/ndb/include/mgmapi/mgmapi_config_parameters.h
    1.33 07/06/05 17:08:44 tomas@stripped +0 -0
    Auto merged

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	tomas
# Host:	poseidon.mysql.com
# Root:	/home/tomas/mysql-5.1-telco-6.1/RESYNC

--- 1.32/storage/ndb/include/mgmapi/mgmapi_config_parameters.h	2007-06-04 10:27:07 +02:00
+++ 1.33/storage/ndb/include/mgmapi/mgmapi_config_parameters.h	2007-06-05 17:08:44 +02:00
@@ -82,6 +82,8 @@
 #define CFG_DB_BACKUP_WRITE_SIZE          136
 #define CFG_DB_BACKUP_MAX_WRITE_SIZE      139
 
+#define CFG_DB_WATCHDOG_INTERVAL_INITIAL  141
+
 #define CFG_LOG_DESTINATION           147
 
 #define CFG_DB_DISCLESS               148

--- 1.99/storage/ndb/src/mgmsrv/ConfigInfo.cpp	2007-06-04 10:31:05 +02:00
+++ 1.100/storage/ndb/src/mgmsrv/ConfigInfo.cpp	2007-06-05 17:08:45 +02:00
@@ -572,6 +572,18 @@ const ConfigInfo::ParamInfo ConfigInfo::
     STR_VALUE(MAX_INT_RNIL) },
 
   {
+    CFG_DB_WATCHDOG_INTERVAL_INITIAL,
+    "TimeBetweenWatchDogCheckInitial",
+    DB_TOKEN,
+    "Time between execution checks inside a database node in the early start phases when
memory is allocated",
+    ConfigInfo::CI_USED,
+    true,
+    ConfigInfo::CI_INT,
+    "6000",
+    "70",
+    STR_VALUE(MAX_INT_RNIL) },
+
+  {
     CFG_DB_STOP_ON_ERROR,
     "StopOnError",
     DB_TOKEN,

--- 1.50/storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp	2007-06-04 11:59:22 +02:00
+++ 1.51/storage/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp	2007-06-05 17:08:44 +02:00
@@ -277,6 +277,14 @@ void Ndbcntr::execSTTOR(Signal* signal) 
     break;
   case ZSTART_PHASE_1:
     jam();
+    {
+      Uint32 db_watchdog_interval = 0;
+      const ndb_mgm_configuration_iterator * p = 
+        m_ctx.m_config.getOwnConfigIterator();
+      ndb_mgm_get_int_parameter(p, CFG_DB_WATCHDOG_INTERVAL, &db_watchdog_interval);
+      ndbrequire(db_watchdog_interval);
+      update_watch_dog_timer(db_watchdog_interval);
+    }
     startPhase1Lab(signal);
     break;
   case ZSTART_PHASE_2:

--- 1.57/storage/ndb/src/kernel/vm/Configuration.cpp	2007-01-24 06:20:36 +01:00
+++ 1.58/storage/ndb/src/kernel/vm/Configuration.cpp	2007-06-05 17:08:44 +02:00
@@ -443,6 +443,11 @@ Configuration::setupConfiguration(){
 	      "TimeBetweenWatchDogCheck missing");
   }
 
+  if(iter.get(CFG_DB_WATCHDOG_INTERVAL_INITIAL, &_timeBetweenWatchDogCheckInitial)){
+    ERROR_SET(fatal, NDBD_EXIT_INVALID_CONFIG, "Invalid configuration fetched", 
+	      "TimeBetweenWatchDogCheckInitial missing");
+  }
+
   /**
    * Get paths
    */  
@@ -462,9 +467,12 @@ Configuration::setupConfiguration(){
    * Create the watch dog thread
    */
   { 
-    Uint32 t = _timeBetweenWatchDogCheck;
+    if (_timeBetweenWatchDogCheckInitial < _timeBetweenWatchDogCheck)
+      _timeBetweenWatchDogCheckInitial = _timeBetweenWatchDogCheck;
+
+    Uint32 t = _timeBetweenWatchDogCheckInitial;
     t = globalEmulatorData.theWatchDog ->setCheckInterval(t);
-    _timeBetweenWatchDogCheck = t;
+    _timeBetweenWatchDogCheckInitial = t;
   }
   
   ConfigValues* cf = ConfigValuesFactory::extractCurrentSection(iter.m_config);

--- 1.23/storage/ndb/src/kernel/vm/Configuration.hpp	2007-01-24 06:20:36 +01:00
+++ 1.24/storage/ndb/src/kernel/vm/Configuration.hpp	2007-06-05 17:08:44 +02:00
@@ -84,6 +84,7 @@ private:
   Uint32 _maxErrorLogs;
   Uint32 _lockPagesInMainMemory;
   Uint32 _timeBetweenWatchDogCheck;
+  Uint32 _timeBetweenWatchDogCheckInitial;
 
   ndb_mgm_configuration * m_ownConfig;
   ndb_mgm_configuration * m_clusterConfig;

--- 1.30/storage/ndb/src/kernel/vm/SimulatedBlock.hpp	2007-01-24 06:20:36 +01:00
+++ 1.31/storage/ndb/src/kernel/vm/SimulatedBlock.hpp	2007-06-05 17:08:44 +02:00
@@ -334,7 +334,8 @@ protected:
    * Refresh Watch Dog in initialising code
    *
    */
-  void refresh_watch_dog();
+  void refresh_watch_dog(Uint32 place = 1);
+  void update_watch_dog_timer(Uint32 interval);
 
   /**
    * Prog error

--- 1.9/storage/ndb/src/kernel/vm/WatchDog.cpp	2007-05-28 16:37:53 +02:00
+++ 1.10/storage/ndb/src/kernel/vm/WatchDog.cpp	2007-06-05 17:08:45 +02:00
@@ -25,6 +25,8 @@
 #include <ErrorHandlingMacros.hpp>
 #include <EventLogger.hpp>
 
+#include <NdbTick.h>
+
 extern EventLogger g_eventLogger;
 
 extern "C" 
@@ -72,73 +74,115 @@ WatchDog::doStop(){
   }
 }
 
+const char *get_action(Uint32 IPValue)
+{
+  const char *action;
+  switch (IPValue) {
+  case 1:
+    action = "Job Handling";
+    break;
+  case 2:
+    action = "Scanning Timers";
+    break;
+  case 3:
+    action = "External I/O";
+    break;
+  case 4:
+    action = "Print Job Buffers at crash";
+    break;
+  case 5:
+    action = "Checking connections";
+    break;
+  case 6:
+    action = "Performing Send";
+    break;
+  case 7:
+    action = "Polling for Receive";
+    break;
+  case 8:
+    action = "Performing Receive";
+    break;
+  case 9:
+    action = "Allocating memory";
+    break;
+  default:
+    action = "Unknown place";
+    break;
+  }//switch
+  return action;
+}
+
 void 
-WatchDog::run(){
-  unsigned int anIPValue;
-  unsigned int alerts = 0;
+WatchDog::run()
+{
+  unsigned int anIPValue, sleep_time;
   unsigned int oldIPValue = 0;
-  
+  unsigned int theIntervalCheck = theInterval;
+  struct MicroSecondTimer start_time, last_time, now;
+  NdbTick_getMicroTimer(&start_time);
+  last_time = start_time;
+
   // WatchDog for the single threaded NDB
-  while(!theStop){
-    Uint32 tmp  = theInterval / 500;
-    tmp= (tmp ? tmp : 1);
-    
-    while(!theStop && tmp > 0){
-      NdbSleep_MilliSleep(500);
-      tmp--;
-    }
-    
+  while (!theStop)
+  {
+    sleep_time= 100;
+
+    NdbSleep_MilliSleep(sleep_time);
     if(theStop)
       break;
 
+    NdbTick_getMicroTimer(&now);
+    if (NdbTick_getMicrosPassed(last_time, now)/1000 > sleep_time*2)
+    {
+      struct tms my_tms;
+      times(&my_tms);
+      g_eventLogger.info("Watchdog: User time: %llu  System time: %llu",
+                         (Uint64)my_tms.tms_utime,
+                         (Uint64)my_tms.tms_stime);
+      g_eventLogger.warning("Watchdog: Warning overslept %u ms, expected %u ms.",
+                            NdbTick_getMicrosPassed(last_time, now)/1000,
+                            sleep_time);
+    }
+    last_time = now;
+
     // Verify that the IP thread is not stuck in a loop
     anIPValue = *theIPValue;
-    if(anIPValue != 0) {
+    if (anIPValue != 0)
+    {
       oldIPValue = anIPValue;
       globalData.incrementWatchDogCounter(0);
-      alerts = 0;
-    } else {
-      const char *last_stuck_action;
-      alerts++;
-      switch (oldIPValue) {
-      case 1:
-        last_stuck_action = "Job Handling";
-        break;
-      case 2:
-        last_stuck_action = "Scanning Timers";
-        break;
-      case 3:
-        last_stuck_action = "External I/O";
-        break;
-      case 4:
-        last_stuck_action = "Print Job Buffers at crash";
-        break;
-      case 5:
-        last_stuck_action = "Checking connections";
-        break;
-      case 6:
-        last_stuck_action = "Performing Send";
-        break;
-      case 7:
-        last_stuck_action = "Polling for Receive";
-        break;
-      case 8:
-        last_stuck_action = "Performing Receive";
-        break;
-      default:
-        last_stuck_action = "Unknown place";
-        break;
-      }//switch
-      g_eventLogger.warning("Ndb kernel is stuck in: %s", last_stuck_action);
+      NdbTick_getMicroTimer(&start_time);
+      theIntervalCheck = theInterval;
+    }
+    else
+    {
+      int warn = 1;
+      Uint32 elapsed = NdbTick_getMicrosPassed(start_time, now)/1000;
+      /*
+        oldIPValue == 9 indicates malloc going on, this can take some time
+        so only warn if we pass the watchdog interval
+      */
+      if (oldIPValue == 9)
+        if (elapsed < theIntervalCheck)
+          warn = 0;
+        else
+          theIntervalCheck += theInterval;
+
+      if (warn)
       {
-        struct tms my_tms;
-        times(&my_tms);
-        g_eventLogger.info("User time: %llu  System time: %llu",
-                           (Uint64)my_tms.tms_utime,
-                           (Uint64)my_tms.tms_stime);
-      }
-      if(alerts == 3){
-	shutdownSystem(last_stuck_action);
+        const char *last_stuck_action = get_action(oldIPValue);
+        g_eventLogger.warning("Ndb kernel is stuck in: %s", last_stuck_action);
+        {
+          struct tms my_tms;
+          times(&my_tms);
+          g_eventLogger.info("Watchdog: User time: %llu  System time: %llu",
+                             (Uint64)my_tms.tms_utime,
+                             (Uint64)my_tms.tms_stime);
+        }
+        if (elapsed > 3 * theInterval)
+        {
+          shutdownSystem(last_stuck_action);
+        }
       }
     }
   }
Thread
bk commit into 5.1 tree (tomas:1.2550)tomas5 Jun