List:Commits« Previous MessageNext Message »
From:msvensson Date:June 15 2006 4:40pm
Subject:bk commit into 5.1 tree (msvensson:1.2209)
View as plain text  
Below is the list of changes that have just been committed into a local
5.1 repository of msvensson. When msvensson does a push these changes will
be propagated to the main repository and, within 24 hours after the
push, to the public repository.
For information on how to access the public repository
see http://dev.mysql.com/doc/mysql/en/installing-source-tree.html

ChangeSet
  1.2209 06/06/15 18:40:18 msvensson@shellback.(none) +2 -0
  Improved handling of marking processes as dead
  Run ndb_mgmd as deamon
  Make extra attempt to check if processes are still alive

  mysql-test/mysql-test-run.pl
    1.136 06/06/15 18:40:12 msvensson@shellback.(none) +17 -14
    Run ndb_mgmd with --nodaemon

  mysql-test/lib/mtr_process.pl
    1.40 06/06/15 18:40:12 msvensson@shellback.(none) +59 -68
    Add common function to mark processes as dead
    When all attempts to kil processes has failed make an extra attempt with ping to check if they really are still alive

# This is a BitKeeper patch.  What follows are the unified diffs for the
# set of deltas contained in the patch.  The rest of the patch, the part
# that BitKeeper cares about, is below these diffs.
# User:	msvensson
# Host:	shellback.(none)
# Root:	/home/msvensson/mysql/mysql-5.1-new-maint

--- 1.39/mysql-test/lib/mtr_process.pl	2006-06-11 23:13:27 +02:00
+++ 1.40/mysql-test/lib/mtr_process.pl	2006-06-15 18:40:12 +02:00
@@ -272,40 +272,17 @@
           last;
         }
 
-        # If one of the mysqld processes died, we want to
+        # If one of the processes died, we want to
         # mark this, and kill the mysqltest process.
 
-        foreach my $idx (0..1)
-        {
-          if ( $::master->[$idx]->{'pid'} eq $ret_pid )
-          {
-            mtr_debug("child $ret_pid was master[$idx], " .
-                      "exit during mysqltest run");
-            $::master->[$idx]->{'pid'}= 0;
-            last;
-          }
-        }
-
-        foreach my $idx (0..2)
-        {
-          if ( $::slave->[$idx]->{'pid'} eq $ret_pid )
-          {
-            mtr_debug("child $ret_pid was slave[$idx], " .
-                      "exit during mysqltest run");
-            $::slave->[$idx]->{'pid'}= 0;
-            last;
-          }
-        }
-
-        mtr_debug("waitpid() caught exit of unknown child $ret_pid, " .
-                  "exit during mysqltest run");
+	mark_process_dead($ret_pid);
       }
 
       if ( $ret_pid != $pid )
       {
         # We terminated the waiting because a "mysqld" process died.
         # Kill the mysqltest process.
-
+	mtr_verbose("Kill mysqltest because another process died");
         kill(9,$pid);
 
         $ret_pid= waitpid($pid,0);
@@ -639,13 +616,19 @@
               mtr_warning("couldn't delete $file");
             }
           }
+	  $srv->{'pid'}= 0;
         }
       }
     }
     if ( $errors )
     {
-      # We are in trouble, just die....
-      mtr_error("we could not kill or clean up all processes");
+      # There where errors killing processes
+      # do one last attempt to ping the servers
+      # and if they can't be pinged, assume they are dead
+      if ( ! mtr_ping_with_timeout( \@$spec ) )
+      {
+	mtr_error("we could not kill or clean up all processes");
+      }
     }
   }
 
@@ -773,6 +756,49 @@
   return $res;
 }
 
+
+#
+# Loop through our list of processes and look for and entry
+# with the provided pid
+# Set the pid of that process to 0 if found
+#
+sub mark_process_dead($)
+{
+  my $ret_pid= shift;
+
+  foreach my $mysqld (@{$::master}, @{$::slave})
+  {
+    if ( $mysqld->{'pid'} eq $ret_pid )
+    {
+      mtr_verbose("$mysqld->{'type'} $mysqld->{'idx'} exited, pid: $ret_pid");
+      $mysqld->{'pid'}= 0;
+      return;
+    }
+  }
+
+  foreach my $cluster (@{$::clusters})
+  {
+    if ( $cluster->{'pid'} eq $ret_pid )
+    {
+      mtr_verbose("$cluster->{'name'} cluster ndb_mgmd exited, pid: $ret_pid");
+      $cluster->{'pid'}= 0;
+      return;
+    }
+
+    foreach my $ndbd (@{$cluster->{'ndbds'}})
+    {
+      if ( $ndbd->{'pid'} eq $ret_pid )
+      {
+	mtr_verbose("$cluster->{'name'} cluster ndbd exited, pid: $ret_pid");
+	$ndbd->{'pid'}= 0;
+	return;
+      }
+    }
+  }
+  mtr_warning("mark_process_dead couldn't find an entry for pid: $ret_pid");
+
+}
+
 ##############################################################################
 #
 #  The operating system will keep information about dead children, 
@@ -789,45 +815,8 @@
   # -1 or 0 means there are no more procesess to wait for
   while ( ($ret_pid= waitpid(-1,&WNOHANG)) != 0 and $ret_pid != -1)
   {
-    mtr_warning("waitpid() caught exit of child $ret_pid");
-    foreach my $idx (0..1)
-    {
-      if ( $::master->[$idx]->{'pid'} eq $ret_pid )
-      {
-        mtr_warning("child $ret_pid was master[$idx]");
-        $::master->[$idx]->{'pid'}= 0;
-      }
-    }
-
-    foreach my $idx (0..2)
-    {
-      if ( $::slave->[$idx]->{'pid'} eq $ret_pid )
-      {
-        mtr_warning("child $ret_pid was slave[$idx]");
-        $::slave->[$idx]->{'pid'}= 0;
-        last;
-      }
-    }
-
-   foreach my $cluster (@{$::clusters})
-   {
-     if ( $cluster->{'pid'} eq $ret_pid )
-     {
-       mtr_warning("child $ret_pid was $cluster->{'name'} cluster ndb_mgmd");
-       $cluster->{'pid'}= 0;
-       last;
-     }
-
-     foreach my $ndbd (@{$cluster->{'ndbds'}})
-     {
-       if ( $ndbd->{'pid'} eq $ret_pid )
-       {
-	 mtr_warning("child $ret_pid was $cluster->{'name'} cluster ndbd");
-	 $ndbd->{'pid'}= 0;
-	 last;
-       }
-     }
-   }
+    mtr_warning("mtr_record_dead_children: $ret_pid");
+    mark_process_dead($ret_pid);
   }
 }
 
@@ -843,7 +832,8 @@
   my $pid;
   while(($pid= waitpid(-1, &WNOHANG)) != 0 and $pid != -1)
   {
-    print "start_reap_all: pid: $pid.\n";
+    mtr_warning("start_reap_all pid: $pid");
+    mark_process_dead($pid);
   };
 }
 
@@ -903,6 +893,7 @@
     # Check if it died after the fork() was successful
     if ( $pid != 0 && waitpid($pid,&WNOHANG) == $pid )
     {
+      mtr_warning("Process $pid died");
       return 0;
     }
 

--- 1.135/mysql-test/mysql-test-run.pl	2006-06-14 21:46:36 +02:00
+++ 1.136/mysql-test/mysql-test-run.pl	2006-06-15 18:40:12 +02:00
@@ -1687,6 +1687,7 @@
 }
 
 
+
 sub mysqld_wait_started($){
   my $mysqld= shift;
 
@@ -1706,6 +1707,7 @@
   mtr_init_args(\$args);
   mtr_add_arg($args, "--no-defaults");
   mtr_add_arg($args, "--core");
+  mtr_add_arg($args, "--nodaemon");
   mtr_add_arg($args, "--config-file=%s", "$cluster->{'data_dir'}/config.ini");
 
 
@@ -1716,9 +1718,23 @@
 		  "",
 		  { append_log_file => 1 });
 
+
+  # FIXME Should not be needed
+  # Unfortunately the cluster nodes will fail to start
+  # if ndb_mgmd has not started properly
+  sleep(1);
+
+ # if (!sleep_until_file_created($cluster->{'path_pid'},
+ #				30, # Seconds
+ #				$pid))
+ #  {
+ #    mtr_warning("Failed to start ndb_mgd for $cluster->{'name'} cluster");
+ #    return 1;
+ #  }
+
   # Remember pid of ndb_mgmd
   $cluster->{'pid'}= $pid;
-  mtr_verbose("ndb_mgmd_start, pid: $pid");
+
   return $pid;
 }
 
@@ -1773,19 +1789,6 @@
   }
 
   my $pid= ndb_mgmd_start($cluster);
-
-  # FIXME Should not be needed
-  # Unfortunately cluster will fail
-  # if ndb_mgmd has not started properly
-  # Wait for the ndb_mgmd pid file to be created
-  if (!sleep_until_file_created($cluster->{'path_pid'},
-				60,
-				$pid))
-  {
-    mtr_warning("Failed to start ndb_mgmd for $cluster->{'name'} cluster");
-    return 1;
-  }
-
 
   for ( my $idx= 0; $idx < $cluster->{'nodes'}; $idx++ )
   {
Thread
bk commit into 5.1 tree (msvensson:1.2209)msvensson15 Jun