List:Commits« Previous MessageNext Message »
From:paul Date:April 18 2007 11:59pm
Subject:svn commit - mysqldoc@docsrva: r5994 - in trunk: . tools
View as plain text  
Author: paul
Date: 2007-04-19 01:59:04 +0200 (Thu, 19 Apr 2007)
New Revision: 5994

Log:
 r19400@frost:  paul | 2007-04-18 18:58:03 -0500
 Update dependency generator.


Modified:
   trunk/tools/xmldepend.pl

Property changes on: trunk
___________________________________________________________________
Name: svk:merge
   - 4767c598-dc10-0410-bea0-d01b485662eb:/mysqldoc-local/mysqldoc/trunk:23662
7d8d2c4e-af1d-0410-ab9f-b038ce55645b:/mysqldoc-local/mysqldoc:19384
b5ec3a16-e900-0410-9ad2-d183a3acac99:/mysqldoc-local/mysqldoc/trunk:14218
bf112a9c-6c03-0410-a055-ad865cd57414:/mysqldoc-local/mysqldoc/trunk:14593
   + 4767c598-dc10-0410-bea0-d01b485662eb:/mysqldoc-local/mysqldoc/trunk:23662
7d8d2c4e-af1d-0410-ab9f-b038ce55645b:/mysqldoc-local/mysqldoc:19400
b5ec3a16-e900-0410-9ad2-d183a3acac99:/mysqldoc-local/mysqldoc/trunk:14218
bf112a9c-6c03-0410-a055-ad865cd57414:/mysqldoc-local/mysqldoc/trunk:14593


Modified: trunk/tools/xmldepend.pl
===================================================================
--- trunk/tools/xmldepend.pl	2007-04-18 22:35:56 UTC (rev 5993)
+++ trunk/tools/xmldepend.pl	2007-04-18 23:59:04 UTC (rev 5994)
Changed blocks: 12, Lines Added: 132, Lines Deleted: 111; 14184 bytes

@@ -11,12 +11,15 @@
 # - Files named by the fileref attribute of any other element.
 # - Files named via <!ENTITY> declarations.
 
-# The output consists of line line per file among all those named on
-# the command line or found to be referenced by them.  Each line
-# contains a filename followed by the names of all files that it
-# refers to (either directly or indirectly via files that the file
-# refers to) If a file refers to no other files, it is the only name
-# on the line.
+# The output consists of one line per file among all those named on
+# the command line or found to be referenced by them.  If no files
+# are named on the command line, no output is produced.
+
+# Each output line contains a filename followed by the names of all
+# files that it refers to (either directly, or indirectly via other
+# files that it refers to) If a file refers to no other files, it is
+# the only name on the line.
+
 # Example: Suppose that test1.xml includes image1.png, test2.xml, and
 # test3.xml, and that test3.xml includes text4.xml. The output will look
 # like this:

@@ -28,24 +31,24 @@
 # The referring file is always first on an output line. The order of the
 # referenced files that follow it is undefined.
 
-# If no files are named on the command line, no output is produced.
+# The output is deliberately generic. It is assumed that a post-processor
+# will reformat the output to be useful in a given context such as a
+# Makefile or an Ant build file.
 
-# The output is deliberately generic and likely must be reformatted to
-# be useful in a given context such as a Makefile or Ant build file.
-
 # Assumptions/restrictions:
 # - XInclude elements are named xi:include.
 # - All referenced files are local, not remote.
-# - Entity declarations that name files file are written a certain way
-#   (see parse_entity_file() below).
-# - Files referenced via entity references contain only other entity
-#   references. That is, they are included into the DOCTYPE declaration,
-#   not into the document body.
+# - Entity declarations that name files are written a certain way
+#   (see add_dependency() below).
+# - Files referenced via entities are included into the DOCTYPE declaration,
+#   not into the document body. (Files referenced in the body should be
+#   referred to via XInclude.)
 
 # To do:
 # - Convert the call to XIncluder->new() to take a hash. Add verbose option.
-# - Only referenced files are checked to see whether they exist and created
-#   if not.  Should also do that for files named on the command line.
+# - Have a --create-if-missing option to control whether to attempt to create
+#   non-existent files before reading them.
+# - Use of make to create missing files could be a security risk.
 
 # Who-to-blame:
 # Paul DuBois

@@ -62,6 +65,9 @@
 
 # Begin main program
 
+# This should be settable from command line; for now, do manually
+my $verbose = 0;
+
 # Map of dependencies. Pass a reference to this map to every invocation
 # of the parser so that any shared dependencies among input files will be
 # merged and appear only once in the output.

@@ -78,8 +84,14 @@
 
 foreach my $file (@ARGV)
 {
-  my $xi_handler = XIncluder->new ($file, \%deps);
+  my $xi_handler = XIncluder->new ($file, \%deps, $verbose);
 
+  if (!defined ($xi_handler))
+  {
+    warn "Skipping $file, already read\n" if $verbose;
+    next;
+  }
+
   # The same handler is used for the document handler and the DTD handler.
   # This appears to be allowable because for the XML::Parser::PerlSAX object,
   # the standard method names for the two handlers do not overlap. Doing

@@ -144,16 +156,18 @@
 # ----------------------------------------------------------------------
 
 # This module provides a handler object for XML::Parser::PerlSAX.
+# Return undef if the file has been read already (as determined by
+# looking at the dependency map).
 
 # The document start_element handler takes these actions:
 # - For <xi:include> elements, the href attribute (required) indicates the
 #   included file, and the parse attribute (optional) indicates whether it
 #   is an XML or text file (default: XML).  Add the file as a dependency
 #   for the current file. If it is an XML file, recursively process it to
-#   find any further dependencies. (Check the file_stack to detect circular
-#   inclusion loops.)
+#   find any further dependencies.
 # - For elements with a fileref attribute, add the attribute value as a
 #   dependency for the current file.
+
 # The DTD entity_decl handler takes these actions:
 # - Adds entity files as dependencies for the current file.
 # - Reads entity files, recursively if necessary, looking for further

@@ -171,13 +185,15 @@
 sub new
 {
 my $class = shift;
-my $file = shift;   # file to be parsed (needed to initialize file_stack)
-my $deps = shift;   # current dependency map
+my $file = shift;    # file to be parsed (needed to initialize file_stack)
+my $deps = shift;    # current dependency map
+my $verbose = shift; # be verbose?
 my $self = {};
 
-  # This should be settable from command line; for now, do manually
-  $self->{verbose} = 0;
+  return undef if exists $deps->{$file};  # file has already been read
 
+  $self->{verbose} = $verbose;
+
   # List of files currently being processed; used to detect circular
   # references. This can be circumvented by use of different paths
   # to the same file, but simple checks are better than none.

@@ -189,13 +205,18 @@
   # names other files on which the key file depends. (A hash is used for
   # the value because it's easier to eliminate duplicates.)
   $self->{deps} = $deps;
+
   # Start an entry for this file so that it shows up in the output even
   # if there are no dependent files.
-  $self->{deps}->{$file} = {} unless exists $self->{deps}->{$file};
+  $self->{deps}->{$file} = {};
+
+  # Bless hash into an object of the class
+  bless ($self, $class);
+
+  $self->create_if_missing ($file);
   warn "Read input file $file\n" if $self->{verbose};
 
-  # Bless hash into an object of the class
-  return bless ($self, $class);
+  return $self;
 }
 
 sub start_element

@@ -213,27 +234,12 @@
     if ($parse eq "xml")
     {
       warn "XInclude XML file $file\n" if $self->{verbose};
-      if ($self->add_dependency ($file))
-      {
-        warn "Skipping $file, already read\n" if $self->{verbose};
-        return;
-      }
-      die "Circular inclusion reference to $file\n"
-        if grep { $_ eq $file } @{$self->{file_stack}};
-      push (@{$self->{file_stack}}, $file);
-      XML::Parser::PerlSAX->new->parse (
-        # Process a new file, but use myself to do it
-        Handler => $self,
-        DTDHandler => $self,
-        EntityResolver => $self,
-        Source => { SystemId => $file }
-      );
-      pop (@{$self->{file_stack}});
+      $self->add_dependency ($file, $parse);
     }
     elsif ($parse eq "text")
     {
       warn "XInclude text file $file\n" if $self->{verbose};
-      $self->add_dependency ($file);
+      $self->add_dependency ($file, $parse);
     }
     else
     {

@@ -245,7 +251,7 @@
     my $file = $self->relativize ($elt->{Attributes}->{fileref});
 
     warn "File reference to $file\n" if $self->{verbose};
-    $self->add_dependency ($file);
+    $self->add_dependency ($file, "fileref");
   }
 }
 

@@ -257,12 +263,7 @@
   return unless defined $file;
   $file = $self->relativize ($file);
   warn "Parse entity file $file\n" if $self->{verbose};
-  if ($self->add_dependency ($file))
-  {
-    warn "Skipping $file, already read\n" if $self->{verbose};
-    return;
-  }
-  $self->parse_entity_file ($file)
+  $self->add_dependency ($file, "entity");
 }
 
 # Helper methods for handler class.

@@ -291,34 +292,29 @@
   return $child;
 }
 
-# Add argument file as dependency for the current file.
+# Add argument file as dependency for the current file and parse it if
+# it's a file that needs to be parsed (XML file or entity file).
 
 # This also starts a deps entry for the argument (with no dependencies),
 # which serves to record that the file has been seen. That way, the file
-# can be skipped without reading it if it's seen again later.
+# can be skipped without reading it if it's seen again.
 
-# Return true if the argument already was listed. This means it has
-# already been read, so the caller need not parse it.
-
-# If the dependency does not exit, try to use make to create it.
-# This handles the case that the document refers to files that might
-# need to be dynamically generated and do not necessarily exist when
-# dependencies need to be generated.
-
 sub add_dependency
 {
 my $self = shift;
 my $child = shift;
+my $type = shift;
 my $curfile = $self->{file_stack}[-1];
 my $already_read;
+my $fh;
 
   #warn "$curfile depends on $child\n" if $self->{verbose};
   # Initialize curfile dependencies to empty hash if necessary
   $self->{deps}->{$curfile} = {} unless exists $self->{deps}->{$curfile};
   # Add child as dependency of curfile (eliminating duplicates)
   $self->{deps}->{$curfile}->{$child} = 1;
-  # Start empty entry for child if none exists yet and determine
-  # whether the file has been read already
+  # Start empty entry for child if none exists yet (this also
+  # determines whether the file has been read already)
   if (exists $self->{deps}->{$child})
   {
     $already_read = 1;

@@ -328,67 +324,92 @@
     $self->{deps}->{$child} = {};
     $already_read = 0;
   }
-  # Try to create child file if it does not exist; send stdout to stderr
-  # so it doesn't pollute dependency output
-  if (! -e $child)
+  # These file types are not read
+  return if $type eq "text" || $type eq "fileref";
+
+  # Other types are read. For each such file:
+  # - Check the file stack to detect circular inclusion loops.
+  # - Don't bother reading the file if it has already been read.
+  # - If the file doesn't exist, try to create it. This handles the case
+  #   that the document refers to files that might need to be dynamically
+  #   generated and do not necessarily exist when dependencies are generated.
+  die "Circular inclusion reference to $child\n"
+    if grep { $_ eq $child } @{$self->{file_stack}};
+  if ($already_read)
   {
-    my ($file, $dir) = fileparse ($child);
-    my $cmd = "make -C $dir $file 1>&2";
-    warn "$file does not exist:\n";
-    warn "attempting to create with \"$cmd\"\n";
-    (system ($cmd) & 127) == 0 or die "Command failed: $cmd\n";
+    warn "Skipping $child, already read\n" if $self->{verbose};
+    return;
   }
-  return $already_read;
-}
+  $self->create_if_missing ($child);
 
-# Parse a file named as an entity, looking for further entity file
-# references. Handle nested files references by recursing.
-# Assumption: Entity file references are written this way:
-# <!ENTITY ... SYSTEM "file_name">
+  # Push file on stack, parse according to type, pop stack
 
-# I suppose that it would be better to use DTDHandler methods for
-# XML::Parser::PerlSAX, except that I've been able to figure out how
-# to make that work. I suspect the support for DTD parsing to be
-# incomplete.
+  push (@{$self->{file_stack}}, $child);
+  if ($type eq "xml")
+  {
+      XML::Parser::PerlSAX->new->parse (
+        # Process a new file, but use myself to do it
+        Handler => $self,
+        DTDHandler => $self,
+        EntityResolver => $self,
+        Source => { SystemId => $child }
+      );
+  }
+  elsif ($type eq "entity")
+  {
+    # Parse a file named as an entity, looking for further entity file
+    # references. Handle nested file references by recursing.
+    # Assumption: Entity file references are written this way:
+    # <!ENTITY % entity_name SYSTEM "file_name">
 
-# Note: Just because an entity file reference is defined does not mean
-# that the file contents is used. Strictly speaking, the reference is
-# not a dependency unless it is used, but it simplifies parsing here
-# to assume that if the file reference is defined it will also be used.
+    # I suppose that it would be better to use DTDHandler methods for
+    # XML::Parser::PerlSAX, except that I've been able to figure out how
+    # to make that work. I suspect the support for DTD parsing to be
+    # incomplete.
 
-sub parse_entity_file
-{
-my ($self, $file) = @_;
-my $fh;
+    # Note: Just because an entity file reference is defined does not
+    # mean that that the document ever actually refers to the entity.
+    # Strictly speaking, the file is not a dependency unless it is
+    # references, but it simplifies parsing here to assume that if the
+    # file entity is defined it will also be referenced.
 
-  #if (exists $self->{deps}->{$file})
-  #{
-  #  warn "Skipping $file, already read\n" if $self->{verbose};
-  #  return;
-  #}
-  die "Circular inclusion reference to $file\n"
-    if grep { $_ eq $file } @{$self->{file_stack}};
-  push (@{$self->{file_stack}}, $file);
-  $fh = IO::File->new ($file, "r");
-  binmode ($fh, ":utf8");
-  while (defined ($_ = <$fh>))
-  {
-    chomp;
-    if (/<!ENTITY\s+\%\s*\S+\s+SYSTEM\s+['"](.*)['"]\s*>/)
+    $fh = IO::File->new ($child, "r");
+    binmode ($fh, ":utf8");
+    while (defined ($_ = <$fh>))
     {
-      my $entfile = $self->relativize ($1);
+      chomp;
+      if (/<!ENTITY\s+\%\s*\S+\s+SYSTEM\s+['"](.*)['"]\s*>/)
+      {
+        my $entfile = $self->relativize ($1);
 
-      warn "Parse nested entity file $entfile\n" if $self->{verbose};
-      if ($self->add_dependency ($entfile))
-      {
-        warn "Skipping $file, already read\n" if $self->{verbose};
-        return;
+        warn "Parse nested entity file $entfile\n" if $self->{verbose};
+        $self->add_dependency ($entfile, "entity");
       }
-      $self->parse_entity_file ($entfile);
     }
+    $fh->close ();
   }
-  $fh->close ();
+  else
+  {
+    die "LOGIC ERROR: Unknown file type: $type\n";
+  }
   pop (@{$self->{file_stack}});
 }
 
+# Try to create a file that is to be read if it is missing.
+# Route stdout to stderr so it doesn't pollute dependency output.
+
+sub create_if_missing
+{
+my ($self, $file) = @_;
+
+  if (! -e $file)
+  {
+    my ($base, $dir) = fileparse ($file);
+    my $cmd = "make -C $dir $base 1>&2";
+    warn "$file does not exist:\n";
+    warn "attempting to create with \"$cmd\"\n";
+    (system ($cmd) == 0) or die "Command failed: $cmd\n";
+  }
+}
+
 1;


Thread
svn commit - mysqldoc@docsrva: r5994 - in trunk: . toolspaul19 Apr