List:Commits« Previous MessageNext Message »
From:Alexander Nozdrin Date:February 8 2007 8:34pm
Subject:bk commit into 5.1 tree (anozdrin:1.2412) BUG#24415
View as plain text  
Below is the list of changes that have just been committed into a local
5.1 repository of alik. When alik does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet@stripped, 2007-02-08 23:34:32+03:00, anozdrin@stripped +5 -0
  Fix for BUG#24415: Instance manager test im_daemon_life_cycle fails randomly.
  
  The cause of im_daemon_life_cycle.imtest random failures was the following
  behaviour of some implementations of LINUX threads: let's suppose that a process
  had several threads (in LINUX threads, each there is a separate process for each
  thread). When the main process gets killed, the parent receives SIGCHLD before
  all threads (child processes) die. In other words, the parent receives SIGCHLD,
  when its child is not completely dead.
  
  In terms of IM, that means that IM-angel receives SIGCHLD when IM-main is not dead
  and still acquires some resources. After receiving SIGCHLD, IM-angel restarts
  IM-main, but IM-main failed to initialize, because previous instance (copy) of
  IM-main still holds server socket (TCP-port).
  
  Another problem here was that IM-angel restarted IM-main only if it was killed
  by signal. If it exited with error, IM-angel thought it's intended / graceful
  shutdown and exited itself.
  
  So, when the second instance of IM-main failed to initialize, IM-angel thought
  it's intended shutdown and quit.
  
  The fix is
    1. to change IM-angel so that it restarts IM-main if it exited with error code;
    2. to change IM-main so that it returns proper exit code in case of failure.
  
  The patch is committed to 5.1, because the bug is not critical.

  server-tools/instance-manager/angel.cc@stripped, 2007-02-08 23:34:30+03:00, anozdrin@stripped +12 -13
    1. Restart IM-main if exit code is not EXIT_SUCCESS (0).
    2. Log IM-main exit code in case of failure.

  server-tools/instance-manager/listener.cc@stripped, 2007-02-08 23:34:30+03:00, anozdrin@stripped +4 -0
    Set error status if Listener failed to initialize.

  server-tools/instance-manager/manager.cc@stripped, 2007-02-08 23:34:30+03:00, anozdrin@stripped +7 -6
    Return exit code from the manager.

  server-tools/instance-manager/thread_registry.cc@stripped, 2007-02-08 23:34:30+03:00, anozdrin@stripped +9 -0
    Add support for exit code.

  server-tools/instance-manager/thread_registry.h@stripped, 2007-02-08 23:34:30+03:00, anozdrin@stripped +4 -0
    Add support for exit code.

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	anozdrin
# Host:	alik.opbmk
# Root:	/mnt/raid/alik/MySQL/devel/5.1-rt-im

--- 1.1/server-tools/instance-manager/angel.cc	2007-02-08 23:34:37 +03:00
+++ 1.2/server-tools/instance-manager/angel.cc	2007-02-08 23:34:37 +03:00
@@ -36,6 +36,7 @@ enum { CHILD_OK= 0, CHILD_NEED_RESPAWN, 
 static int log_fd;
 
 static volatile sig_atomic_t child_status= CHILD_OK;
+static volatile sig_atomic_t child_exit_code= 0;
 static volatile sig_atomic_t shutdown_request_signo= 0;
 
 
@@ -208,24 +209,20 @@ static bool create_pid_file()
     reap_child()
 
   DESCRIPTION
-    Reap child, analyze child exit status, and set child_status
+    Reap child, analyze child exit code, and set child_status
     appropriately.
 ************************************************************************/
 
 void reap_child(int __attribute__((unused)) signo)
 {
-  int child_exit_status;
-  /* As we have only one child, no need to cycle waitpid */
-  if (waitpid(0, &child_exit_status, WNOHANG) > 0)
+  /* NOTE: As we have only one child, no need to cycle waitpid(). */
+
+  int exit_code;
+
+  if (waitpid(0, &exit_code, WNOHANG) > 0)
   {
-    if (WIFSIGNALED(child_exit_status))
-      child_status= CHILD_NEED_RESPAWN;
-    else
-      /*
-        As reap_child is not called for SIGSTOP, we should be here only
-        if the child exited normally.
-      */
-      child_status= CHILD_EXIT_ANGEL;
+    child_exit_code= exit_code;
+    child_status= exit_code ? CHILD_NEED_RESPAWN : CHILD_EXIT_ANGEL;
   }
 }
 
@@ -353,7 +350,9 @@ static int angel_main_loop()
     {
       child_status= CHILD_OK;
 
-      log_error("Angel: Manager exited abnormally.");
+      log_error("Angel: Manager exited abnormally (exit code: %d).",
+                (int) child_exit_code);
+
       log_info("Angel: sleeping 1 second...");
 
       sleep(1); /* don't respawn too fast */

--- 1.40/server-tools/instance-manager/listener.cc	2007-02-08 23:34:37 +03:00
+++ 1.41/server-tools/instance-manager/listener.cc	2007-02-08 23:34:37 +03:00
@@ -177,12 +177,16 @@ void Listener::run()
   return;
 
 err:
+  log_error("Listener: failed to initialize. Initiate shutdown...");
+
   // we have to close the ip sockets in case of error
   for (i= 0; i < num_sockets; i++)
     closesocket(sockets[i]);
 
+  thread_registry->set_error_status();
   thread_registry->unregister_thread(&thread_info);
   thread_registry->request_shutdown();
+
   return;
 }
 

--- 1.47/server-tools/instance-manager/manager.cc	2007-02-08 23:34:37 +03:00
+++ 1.48/server-tools/instance-manager/manager.cc	2007-02-08 23:34:37 +03:00
@@ -179,6 +179,9 @@ void Manager::stop_all_threads()
 
   /* Stop all threads. */
   p_thread_registry->deliver_shutdown();
+
+  /* Set error status in the thread registry. */
+  p_thread_registry->set_error_status();
 }
 
 
@@ -194,13 +197,13 @@ void Manager::stop_all_threads()
     See also comments in mysqlmanager.cc to picture general Instance Manager
     architecture.
 
-  TODO: how about returning error status.
+  RETURN
+    main() returns exit status (exit code).
 */
 
 int Manager::main()
 {
   int err_code;
-  int rc= 1;
   const char *err_msg;
   bool shutdown_complete= FALSE;
   pid_t manager_pid= getpid();
@@ -418,8 +421,6 @@ int Manager::main()
 
   log_info("Manager: finished.");
 
-  rc= 0;
-
 err:
   /* delete the pid file */
   my_delete(Options::Main::pid_file_name, MYF(0));
@@ -427,9 +428,9 @@ err:
 #ifndef __WIN__
   /* free alarm structures */
   end_thr_alarm(1);
-  /* don't pthread_exit to kill all threads who did not shut down in time */
 #endif
-  return rc;
+
+  return thread_registry.get_error_status() ? 1 : 0;
 }
 
 

--- 1.18/server-tools/instance-manager/thread_registry.cc	2007-02-08 23:34:37 +03:00
+++ 1.19/server-tools/instance-manager/thread_registry.cc	2007-02-08 23:34:37 +03:00
@@ -52,6 +52,7 @@ void Thread_info::init(bool send_signal_
 Thread_registry::Thread_registry() :
    shutdown_in_progress(FALSE)
   ,sigwait_thread_pid(pthread_self())
+  ,error_status(FALSE)
 {
   pthread_mutex_init(&LOCK_thread_registry, 0);
   pthread_cond_init(&COND_thread_registry_is_empty, 0);
@@ -297,6 +298,14 @@ void Thread_registry::wait_for_threads_t
       return;
     }
   }
+}
+
+
+void Thread_registry::set_error_status()
+{
+  pthread_mutex_lock(&LOCK_thread_registry);
+  error_status= TRUE;
+  pthread_mutex_unlock(&LOCK_thread_registry);
 }
 
 

--- 1.12/server-tools/instance-manager/thread_registry.h	2007-02-08 23:34:37 +03:00
+++ 1.13/server-tools/instance-manager/thread_registry.h	2007-02-08 23:34:37 +03:00
@@ -144,6 +144,9 @@ public:
   int cond_timedwait(Thread_info *info, pthread_cond_t *cond,
                      pthread_mutex_t *mutex, struct timespec *wait_time);
 
+  void set_error_status();
+  inline int get_error_status() const { return error_status; }
+
 private:
   void interrupt_threads();
   void wait_for_threads_to_unregister();
@@ -154,6 +157,7 @@ private:
   pthread_mutex_t LOCK_thread_registry;
   pthread_cond_t COND_thread_registry_is_empty;
   pthread_t sigwait_thread_pid;
+  bool error_status;
 
 private:
   Thread_registry(const Thread_registry &);
Thread
bk commit into 5.1 tree (anozdrin:1.2412) BUG#24415Alexander Nozdrin8 Feb