MySQL Lists are EOL. Please join:

List:Commits« Previous MessageNext Message »
From:msvensson Date:June 20 2007 9:47am
Subject:bk commit into 5.0 tree (msvensson:1.2531) BUG#28742
View as plain text  
Below is the list of changes that have just been committed into a local
5.0 repository of msvensson. When msvensson does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet@stripped, 2007-06-20 11:47:55+02:00, msvensson@pilot.(none) +3 -0
  Bug#28742 mysql-test-run is very slow on "Stopping All Servers" step
   - Improve shutdown algorithm 
   - Wait up to 5 seconds for processes to exit after their port is free

  mysql-test/lib/mtr_process.pl@stripped, 2007-06-20 11:47:54+02:00, msvensson@pilot.(none) +81 -88
    Improve shutdown algorithm, shutdown the server hard
    if it hasn't responded to "mysqladmin shutdown" and it's port is free.
    Print error to servers error log indicating "hard shutdown"
    Give processes up to 5 seconds to exit after their port is free

  mysql-test/lib/mtr_report.pl@stripped, 2007-06-20 11:47:54+02:00, msvensson@pilot.(none) +1 -1
    Indicate in what file the warning was found

  mysql-test/mysql-test-run.pl@stripped, 2007-06-20 11:47:54+02:00, msvensson@pilot.(none) +3 -0
    Pass path of process error log to 'mtr_check_stop_servers'

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	msvensson
# Host:	pilot.(none)
# Root:	/data/msvensson/mysql/bug28742/my50-bug28742

--- 1.56/mysql-test/lib/mtr_process.pl	2007-04-23 11:03:39 +02:00
+++ 1.57/mysql-test/lib/mtr_process.pl	2007-06-20 11:47:54 +02:00
@@ -547,72 +547,87 @@ sub mtr_kill_leftovers () {
 }
 
 
-# Check that all processes in list are killed
-# The argument is a list of 'ports', 'pids', 'pidfiles' and 'socketfiles'
-# for which shutdown has been started. Make sure they all get killed
-# in one way or the other.
 #
-# FIXME On Cygwin, and maybe some other platforms, $srv->{'pid'} and
-# the pid in $srv->{'pidfile'} will not be the same PID. We need to try to kill
-# both I think.
-
+# Check that all processes in "spec" are shutdown gracefully
+# else kill them off hard
+#
 sub mtr_check_stop_servers ($) {
   my $spec=  shift;
 
   # Return if no processes are defined
   return if ! @$spec;
 
-  #mtr_report("mtr_check_stop_servers");
+  mtr_verbose("mtr_check_stop_servers");
 
+  # ----------------------------------------------------------------------
+  # Wait until servers in "spec" has stopped listening
+  # to their ports or timeout occurs
+  # ----------------------------------------------------------------------
   mtr_ping_with_timeout(\@$spec);
 
   # ----------------------------------------------------------------------
-  # We loop with waitpid() nonblocking to see how many of the ones we
-  # are to kill, actually got killed by mysqladmin or ndb_mgm
-  #
-  # Note that we don't rely on this, the mysqld server might have stopped
-  # listening to the port, but still be alive. But it is a start.
+  # Use waitpid() nonblocking for a little while, to see how
+  # many process's will exit sucessfully.
+  # This is the normal case.
   # ----------------------------------------------------------------------
-
+  my $wait_counter= 50; # Max number of times to redo the loop
   foreach my $srv ( @$spec )
   {
+    my $pid= $srv->{'pid'};
     my $ret_pid;
-    if ( $srv->{'pid'} )
+    if ( $pid )
     {
-      $ret_pid= waitpid($srv->{'pid'},&WNOHANG);
-      if ($ret_pid == $srv->{'pid'})
+      $ret_pid= waitpid($pid,&WNOHANG);
+      if ($ret_pid == $pid)
       {
 	mtr_verbose("Caught exit of process $ret_pid");
 	$srv->{'pid'}= 0;
       }
+      elsif ($ret_pid == 0)
+      {
+	mtr_verbose("Process $pid is still alive");
+	if ($wait_counter-- > 0)
+	{
+	  # Give the processes more time to exit
+	  select(undef, undef, undef, (0.1));
+	  redo;
+	}
+      }
       else
       {
-	# mtr_warning("caught exit of unknown child $ret_pid");
+	mtr_warning("caught exit of unknown child $ret_pid");
       }
     }
   }
 
   # ----------------------------------------------------------------------
-  # We know the process was started from this file, so there is a PID
-  # saved, or else we have nothing to do.
-  # Might be that is is recorded to be missing, but we failed to
-  # take away the PID file earlier, then we do it now.
+  # The processes that haven't yet exited need to
+  # be killed hard, put them in "kill_pids" hash
   # ----------------------------------------------------------------------
-
-  my %mysqld_pids;
-
+  my %kill_pids;
   foreach my $srv ( @$spec )
   {
-    if ( $srv->{'pid'} )
+    my $pid= $srv->{'pid'};
+    if ( $pid )
     {
-      $mysqld_pids{$srv->{'pid'}}= 1;
+      # Server is still alive, put it in list to be hard killed
+      $kill_pids{$pid}= 1;
+
+      # Write a message to the process's error log (if it has one)
+      # that it's being killed hard.
+      if ( defined $srv->{'errfile'} )
+      {
+	mtr_tofile($srv->{'errfile'}, "Note: Forcing kill of process $pid\n");
+      }
+      mtr_warning("Forcing kill of process $pid");
+
     }
     else
     {
-      # Server is dead, we remove the pidfile if any
-      # Race, could have been removed between I tested with -f
-      # and the unlink() below, so I better check again with -f
-
+      # Server is dead, remove the pidfile if it exists
+      #
+      # Race, could have been removed between test with -f
+      # and the unlink() below, so better check again with -f
       if ( -f $srv->{'pidfile'} and ! unlink($srv->{'pidfile'}) and
            -f $srv->{'pidfile'} )
       {
@@ -621,69 +636,35 @@ sub mtr_check_stop_servers ($) {
     }
   }
 
-  # ----------------------------------------------------------------------
-  # If all the processes in list already have been killed,
-  # then we don't have to do anything.
-  # ----------------------------------------------------------------------
-
-  if ( ! keys %mysqld_pids )
+  if ( ! keys %kill_pids )
   {
+    # All processes has exited gracefully
     return;
   }
 
-  # ----------------------------------------------------------------------
-  # In mtr_mysqladmin_shutdown() we only waited for the mysqld servers
-  # not to listen to the port. But we are not sure we got them all
-  # killed. If we suspect it lives, try nice kill with SIG_TERM. Note
-  # that for true Win32 processes, kill(0,$pid) will not return 1.
-  # ----------------------------------------------------------------------
-
-  start_reap_all();                     # Avoid zombies
-
-  my @mysqld_pids= keys %mysqld_pids;
-  mtr_kill_processes(\@mysqld_pids);
-
-  stop_reap_all();                      # Get into control again
+  mtr_kill_processes(\%kill_pids);
 
   # ----------------------------------------------------------------------
-  # Now, we check if all we can find using kill(0,$pid) are dead,
-  # and just assume the rest are. We cleanup socket and PID files.
+  # All processes are killed, cleanup leftover files
   # ----------------------------------------------------------------------
-
   {
     my $errors= 0;
     foreach my $srv ( @$spec )
     {
       if ( $srv->{'pid'} )
       {
-        if ( kill(0,$srv->{'pid'}) )
-        {
-          # FIXME In Cygwin there seem to be some fast reuse
-          # of PIDs, so dying may not be the right thing to do.
-          $errors++;
-          mtr_warning("can't kill process $srv->{'pid'}");
-        }
-        else
+	# Server has been hard killed, clean it's resources
+	foreach my $file ($srv->{'pidfile'}, $srv->{'sockfile'})
         {
-          # We managed to kill it at last
-          # FIXME In Cygwin, we will get here even if the process lives.
-
-          # Not needed as we know the process is dead, but to be safe
-          # we unlink and check success in two steps. We first unlink
-          # without checking the error code, and then check if the
-          # file still exists.
-
-          foreach my $file ($srv->{'pidfile'}, $srv->{'sockfile'})
+	  # Know it is dead so should be no race, careful anyway
+	  if ( defined $file and -f $file and ! unlink($file) and -f $file )
           {
-            # Know it is dead so should be no race, careful anyway
-            if ( defined $file and -f $file and ! unlink($file) and -f $file )
-            {
-              $errors++;
-              mtr_warning("couldn't delete $file");
-            }
-          }
-	  $srv->{'pid'}= 0;
-        }
+	    $errors++;
+	    mtr_warning("couldn't delete $file");
+	  }
+	}
+
+	$srv->{'pid'}= 0;
       }
     }
     if ( $errors )
@@ -701,12 +682,9 @@ sub mtr_check_stop_servers ($) {
       }
     }
   }
-
-  # FIXME We just assume they are all dead, for Cygwin we are not
-  # really sure
-
 }
 
+
 # Wait for all the process in the list to terminate
 sub mtr_wait_blocking($) {
   my $admin_pids= shift;
@@ -1095,9 +1073,9 @@ sub sleep_until_file_created ($$$) {
 sub mtr_kill_processes ($) {
   my $pids = shift;
 
-  mtr_verbose("mtr_kill_processes " . join(" ", @$pids));
+  mtr_verbose("mtr_kill_processes (" . join(" ", keys %{$pids}) . ")");
 
-  foreach my $pid (@$pids)
+  foreach my $pid (keys %{$pids})
   {
 
     if ($pid <= 0)
@@ -1106,11 +1084,26 @@ sub mtr_kill_processes ($) {
       next;
     }
 
-    foreach my $sig (15, 9)
+    my $signaled_procs= kill(9, $pid);
+    if ($signaled_procs == 0)
+    {
+      # No such process existed, assume it's killed
+      mtr_verbose("killed $pid(no such process)");
+    }
+    else
     {
-      last if mtr_im_kill_process([ $pid ], $sig, 10, 1);
+      my $ret_pid= waitpid($pid,0);
+      if ($ret_pid == $pid)
+      {
+	mtr_verbose("killed $pid(got the pid)");
+      }
+      elsif ($ret_pid == -1)
+      {
+	mtr_verbose("killed $pid(got -1)");
+      }
     }
   }
+  mtr_verbose("done killing processes");
 }
 
 

--- 1.38/mysql-test/lib/mtr_report.pl	2007-04-28 10:13:11 +02:00
+++ 1.39/mysql-test/lib/mtr_report.pl	2007-06-20 11:47:54 +02:00
@@ -290,7 +290,7 @@ sub mtr_report_stats ($) {
             if ( /$pattern/ )
             {
               $found_problems= 1;
-              print WARN $_;
+              print WARN basename($errlog) . ": $_";
             }
           }
         }

--- 1.227/mysql-test/mysql-test-run.pl	2007-06-13 10:36:45 +02:00
+++ 1.228/mysql-test/mysql-test-run.pl	2007-06-20 11:47:54 +02:00
@@ -4097,6 +4097,7 @@ sub stop_all_servers () {
 		       pidfile  => $mysqld->{'path_pid'},
 		       sockfile => $mysqld->{'path_sock'},
 		       port     => $mysqld->{'port'},
+		       errfile   => $mysqld->{'path_myerr'},
 		      });
 
       $mysqld->{'pid'}= 0; # Assume we are done with it
@@ -4303,6 +4304,7 @@ sub run_testcase_stop_servers($$$) {
 			 pidfile  => $mysqld->{'path_pid'},
 			 sockfile => $mysqld->{'path_sock'},
 			 port     => $mysqld->{'port'},
+			 errfile   => $mysqld->{'path_myerr'},
 			});
 
 	$mysqld->{'pid'}= 0; # Assume we are done with it
@@ -4353,6 +4355,7 @@ sub run_testcase_stop_servers($$$) {
 			 pidfile  => $mysqld->{'path_pid'},
 			 sockfile => $mysqld->{'path_sock'},
 			 port     => $mysqld->{'port'},
+			 errfile   => $mysqld->{'path_myerr'},
 			});
 
 
Thread
bk commit into 5.0 tree (msvensson:1.2531) BUG#28742msvensson20 Jun