List:Internals« Previous MessageNext Message »
From:tomas Date:September 5 2005 3:14pm
Subject:bk commit into 4.1 tree (tulin:1.2407) BUG#12992
View as plain text  
Below is the list of changes that have just been committed into a local
4.1 repository of ndbdev. When ndbdev does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet
  1.2407 05/09/05 15:14:44 tulin@stripped +6 -0
  Bug #12992  Cluster StopOnError = Y restarts ndbd indefinitly

  ndb/src/kernel/vm/Emulator.hpp
    1.6 05/09/05 15:14:42 tulin@stripped +2 -1
    Bug #12992  Cluster StopOnError = Y restarts ndbd indefinitly

  ndb/src/kernel/vm/Emulator.cpp
    1.16 05/09/05 15:14:42 tulin@stripped +6 -0
    Bug #12992  Cluster StopOnError = Y restarts ndbd indefinitly

  ndb/src/kernel/main.cpp
    1.43 05/09/05 15:14:42 tulin@stripped +37 -0
    Bug #12992  Cluster StopOnError = Y restarts ndbd indefinitly

  ndb/src/kernel/error/ErrorReporter.hpp
    1.7 05/09/05 15:14:42 tulin@stripped +2 -0
    Bug #12992  Cluster StopOnError = Y restarts ndbd indefinitly

  ndb/src/kernel/error/ErrorReporter.cpp
    1.11 05/09/05 15:14:42 tulin@stripped +12 -2
    Bug #12992  Cluster StopOnError = Y restarts ndbd indefinitly

  ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp
    1.14 05/09/05 15:14:41 tulin@stripped +8 -0
    Bug #12992  Cluster StopOnError = Y restarts ndbd indefinitly

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	tulin
# Host:	dl145b.mysql.com
# Root:	/home/ndbdev/mysql-4.1

--- 1.13/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp	Wed Aug 31 16:14:59 2005
+++ 1.14/ndb/src/kernel/blocks/ndbcntr/NdbcntrMain.cpp	Mon Sep  5 15:14:41 2005
@@ -2493,6 +2493,14 @@
     
     const Uint32 start = currentBlockIndex;
     
+    if (currentStartPhase == ZSTART_PHASE_6)
+    {
+      // Ndbd has passed the critical startphases.
+      // Change error handler from "startup" state
+      // to normal state.
+      ErrorReporter::setErrorHandlerShutdownType();
+    }
+
     for(; currentBlockIndex < ALL_BLOCKS_SZ; currentBlockIndex++){
       jam();
       if(ALL_BLOCKS[currentBlockIndex].NextSP == currentStartPhase){

--- 1.10/ndb/src/kernel/error/ErrorReporter.cpp	Mon May  9 11:04:33 2005
+++ 1.11/ndb/src/kernel/error/ErrorReporter.cpp	Mon Sep  5 15:14:42 2005
@@ -152,6 +152,14 @@
   return;
 }
 
+NdbShutdownType ErrorReporter::s_errorHandlerShutdownType = NST_ErrorHandler;
+
+void
+ErrorReporter::setErrorHandlerShutdownType(NdbShutdownType nst)
+{
+  s_errorHandlerShutdownType = nst;
+}
+
 void
 ErrorReporter::handleAssert(const char* message, const char* file, int line)
 {
@@ -170,7 +178,7 @@
   WriteMessage(assert, ERR_ERROR_PRGERR, message, refMessage,
 	       theEmulatedJamIndex, theEmulatedJam);
 
-  NdbShutdown(NST_ErrorHandler);
+  NdbShutdown(s_errorHandlerShutdownType);
 }
 
 void
@@ -182,7 +190,7 @@
   BaseString::snprintf(refMessage, 100, "file: %s lineNo: %d - %s",
 	   file, line, message);
   
-  NdbShutdown(NST_ErrorHandler);
+  NdbShutdown(s_errorHandlerShutdownType);
 }//ErrorReporter::handleThreadAssert()
 
 
@@ -201,6 +209,8 @@
   if(messageID == ERR_ERROR_INSERT){
     NdbShutdown(NST_ErrorInsert);
   } else {
+    if (nst == NST_ErrorHandler)
+      nst = s_errorHandlerShutdownType;
     NdbShutdown(nst);
   }
 }

--- 1.6/ndb/src/kernel/error/ErrorReporter.hpp	Tue Aug  3 14:08:23 2004
+++ 1.7/ndb/src/kernel/error/ErrorReporter.hpp	Mon Sep  5 15:14:42 2005
@@ -26,6 +26,7 @@
 class ErrorReporter
 {
 public:
+  static void setErrorHandlerShutdownType(NdbShutdownType nst = NST_ErrorHandler);
   static void handleAssert(const char* message, 
 			   const char* file, 
 			   int line);
@@ -57,6 +58,7 @@
   static const char* formatTimeStampString();
   
 private:
+  static enum NdbShutdownType s_errorHandlerShutdownType;
 };
 
 #endif

--- 1.42/ndb/src/kernel/main.cpp	Mon Sep  5 09:29:26 2005
+++ 1.43/ndb/src/kernel/main.cpp	Mon Sep  5 15:14:42 2005
@@ -45,8 +45,14 @@
 
 void catchsigs(bool ignore); // for process signal handling
 
+#define MAX_FAILED_STARTUPS 3
+// Flag set by child through SIGUSR1 to signal a failed startup
+static bool failed_startup_flag = false;
+// Counter for consecutive failed startups
+static Uint32 failed_startups = 0;
 extern "C" void handler_shutdown(int signum);  // for process signal handling
 extern "C" void handler_error(int signum);  // for process signal handling
+extern "C" void handler_sigusr1(int signum);  // child signalling failed restart
 
 // Shows system information
 void systemInfo(const Configuration & conf,
@@ -92,6 +98,8 @@
   }
   
 #ifndef NDB_WIN32
+  signal(SIGUSR1, handler_sigusr1);
+
   for(pid_t child = fork(); child != 0; child = fork()){
     /**
      * Parent
@@ -137,6 +145,20 @@
        */
       exit(0);
     }
+    if (!failed_startup_flag)
+    {
+      // Reset the counter for consecutive failed startups
+      failed_startups = 0;
+    }
+    else if (failed_startups >= MAX_FAILED_STARTUPS &&
!theConfig->stopOnError())
+    {
+      /**
+       * Error shutdown && stopOnError()
+       */
+      g_eventLogger.alert("Ndbd has failed %u consecutive startups. Not restarting",
failed_startups);
+      exit(0);
+    }
+    failed_startup_flag = false;
     g_eventLogger.info("Ndb has terminated (pid %d) restarting", child);
     theConfig->fetch_configuration();
   }
@@ -170,6 +192,9 @@
   /**
    * Do startup
    */
+
+  ErrorReporter::setErrorHandlerShutdownType(NST_ErrorHandlerStartup);
+
   switch(globalData.theRestartFlag){
   case initial_state:
     globalEmulatorData.theThreadConfig->doStart(NodeState::SL_CMVMI);
@@ -358,4 +383,16 @@
   char errorData[40];
   BaseString::snprintf(errorData, 40, "Signal %d received", signum);
   ERROR_SET_SIGNAL(fatal, 0, errorData, __FILE__);
+}
+
+extern "C"
+void 
+handler_sigusr1(int signum)
+{
+  if (!failed_startup_flag)
+  {
+    failed_startups++;
+    failed_startup_flag = true;
+  }
+  g_eventLogger.info("Received signal %d. Ndbd failed startup (%u).", signum,
failed_startups);
 }

--- 1.15/ndb/src/kernel/vm/Emulator.cpp	Mon Sep  5 09:29:26 2005
+++ 1.16/ndb/src/kernel/vm/Emulator.cpp	Mon Sep  5 15:14:42 2005
@@ -154,6 +154,9 @@
     case NST_ErrorHandlerSignal:
       g_eventLogger.info("Error handler signal %s system", shutting);
       break;
+    case NST_ErrorHandlerStartup:
+      g_eventLogger.info("Error handler startup %s system", shutting);
+      break;
     case NST_Restart:
       g_eventLogger.info("Restarting system");
       break;
@@ -229,6 +232,9 @@
     }
     
     if(type != NST_Normal && type != NST_Restart){
+      // Signal parent that error occured during startup
+      if (type == NST_ErrorHandlerStartup)
+	kill(getppid(), SIGUSR1);
       g_eventLogger.info("Error handler shutdown completed - %s", exitAbort);
 #if ( defined VM_TRACE || defined ERROR_INSERT ) && ( ! ( defined NDB_OSE ||
defined NDB_SOFTOSE) )
       signal(6, SIG_DFL);

--- 1.5/ndb/src/kernel/vm/Emulator.hpp	Wed Feb 23 10:12:20 2005
+++ 1.6/ndb/src/kernel/vm/Emulator.hpp	Mon Sep  5 15:14:42 2005
@@ -83,7 +83,8 @@
   NST_ErrorHandler,
   NST_ErrorHandlerSignal,
   NST_Restart,
-  NST_ErrorInsert
+  NST_ErrorInsert,
+  NST_ErrorHandlerStartup
 };
 
 enum NdbRestartType {
Thread
bk commit into 4.1 tree (tulin:1.2407) BUG#12992tomas5 Sep