Author: paul
Date: 2007-04-19 01:59:04 +0200 (Thu, 19 Apr 2007)
New Revision: 5994
Log:
r19400@frost: paul | 2007-04-18 18:58:03 -0500
Update dependency generator.
Modified:
trunk/tools/xmldepend.pl
Property changes on: trunk
___________________________________________________________________
Name: svk:merge
- 4767c598-dc10-0410-bea0-d01b485662eb:/mysqldoc-local/mysqldoc/trunk:23662
7d8d2c4e-af1d-0410-ab9f-b038ce55645b:/mysqldoc-local/mysqldoc:19384
b5ec3a16-e900-0410-9ad2-d183a3acac99:/mysqldoc-local/mysqldoc/trunk:14218
bf112a9c-6c03-0410-a055-ad865cd57414:/mysqldoc-local/mysqldoc/trunk:14593
+ 4767c598-dc10-0410-bea0-d01b485662eb:/mysqldoc-local/mysqldoc/trunk:23662
7d8d2c4e-af1d-0410-ab9f-b038ce55645b:/mysqldoc-local/mysqldoc:19400
b5ec3a16-e900-0410-9ad2-d183a3acac99:/mysqldoc-local/mysqldoc/trunk:14218
bf112a9c-6c03-0410-a055-ad865cd57414:/mysqldoc-local/mysqldoc/trunk:14593
Modified: trunk/tools/xmldepend.pl
===================================================================
--- trunk/tools/xmldepend.pl 2007-04-18 22:35:56 UTC (rev 5993)
+++ trunk/tools/xmldepend.pl 2007-04-18 23:59:04 UTC (rev 5994)
Changed blocks: 12, Lines Added: 132, Lines Deleted: 111; 14184 bytes
@@ -11,12 +11,15 @@
# - Files named by the fileref attribute of any other element.
# - Files named via <!ENTITY> declarations.
-# The output consists of line line per file among all those named on
-# the command line or found to be referenced by them. Each line
-# contains a filename followed by the names of all files that it
-# refers to (either directly or indirectly via files that the file
-# refers to) If a file refers to no other files, it is the only name
-# on the line.
+# The output consists of one line per file among all those named on
+# the command line or found to be referenced by them. If no files
+# are named on the command line, no output is produced.
+
+# Each output line contains a filename followed by the names of all
+# files that it refers to (either directly, or indirectly via other
+# files that it refers to) If a file refers to no other files, it is
+# the only name on the line.
+
# Example: Suppose that test1.xml includes image1.png, test2.xml, and
# test3.xml, and that test3.xml includes text4.xml. The output will look
# like this:
@@ -28,24 +31,24 @@
# The referring file is always first on an output line. The order of the
# referenced files that follow it is undefined.
-# If no files are named on the command line, no output is produced.
+# The output is deliberately generic. It is assumed that a post-processor
+# will reformat the output to be useful in a given context such as a
+# Makefile or an Ant build file.
-# The output is deliberately generic and likely must be reformatted to
-# be useful in a given context such as a Makefile or Ant build file.
-
# Assumptions/restrictions:
# - XInclude elements are named xi:include.
# - All referenced files are local, not remote.
-# - Entity declarations that name files file are written a certain way
-# (see parse_entity_file() below).
-# - Files referenced via entity references contain only other entity
-# references. That is, they are included into the DOCTYPE declaration,
-# not into the document body.
+# - Entity declarations that name files are written a certain way
+# (see add_dependency() below).
+# - Files referenced via entities are included into the DOCTYPE declaration,
+# not into the document body. (Files referenced in the body should be
+# referred to via XInclude.)
# To do:
# - Convert the call to XIncluder->new() to take a hash. Add verbose option.
-# - Only referenced files are checked to see whether they exist and created
-# if not. Should also do that for files named on the command line.
+# - Have a --create-if-missing option to control whether to attempt to create
+# non-existent files before reading them.
+# - Use of make to create missing files could be a security risk.
# Who-to-blame:
# Paul DuBois
@@ -62,6 +65,9 @@
# Begin main program
+# This should be settable from command line; for now, do manually
+my $verbose = 0;
+
# Map of dependencies. Pass a reference to this map to every invocation
# of the parser so that any shared dependencies among input files will be
# merged and appear only once in the output.
@@ -78,8 +84,14 @@
foreach my $file (@ARGV)
{
- my $xi_handler = XIncluder->new ($file, \%deps);
+ my $xi_handler = XIncluder->new ($file, \%deps, $verbose);
+ if (!defined ($xi_handler))
+ {
+ warn "Skipping $file, already read\n" if $verbose;
+ next;
+ }
+
# The same handler is used for the document handler and the DTD handler.
# This appears to be allowable because for the XML::Parser::PerlSAX object,
# the standard method names for the two handlers do not overlap. Doing
@@ -144,16 +156,18 @@
# ----------------------------------------------------------------------
# This module provides a handler object for XML::Parser::PerlSAX.
+# Return undef if the file has been read already (as determined by
+# looking at the dependency map).
# The document start_element handler takes these actions:
# - For <xi:include> elements, the href attribute (required) indicates the
# included file, and the parse attribute (optional) indicates whether it
# is an XML or text file (default: XML). Add the file as a dependency
# for the current file. If it is an XML file, recursively process it to
-# find any further dependencies. (Check the file_stack to detect circular
-# inclusion loops.)
+# find any further dependencies.
# - For elements with a fileref attribute, add the attribute value as a
# dependency for the current file.
+
# The DTD entity_decl handler takes these actions:
# - Adds entity files as dependencies for the current file.
# - Reads entity files, recursively if necessary, looking for further
@@ -171,13 +185,15 @@
sub new
{
my $class = shift;
-my $file = shift; # file to be parsed (needed to initialize file_stack)
-my $deps = shift; # current dependency map
+my $file = shift; # file to be parsed (needed to initialize file_stack)
+my $deps = shift; # current dependency map
+my $verbose = shift; # be verbose?
my $self = {};
- # This should be settable from command line; for now, do manually
- $self->{verbose} = 0;
+ return undef if exists $deps->{$file}; # file has already been read
+ $self->{verbose} = $verbose;
+
# List of files currently being processed; used to detect circular
# references. This can be circumvented by use of different paths
# to the same file, but simple checks are better than none.
@@ -189,13 +205,18 @@
# names other files on which the key file depends. (A hash is used for
# the value because it's easier to eliminate duplicates.)
$self->{deps} = $deps;
+
# Start an entry for this file so that it shows up in the output even
# if there are no dependent files.
- $self->{deps}->{$file} = {} unless exists $self->{deps}->{$file};
+ $self->{deps}->{$file} = {};
+
+ # Bless hash into an object of the class
+ bless ($self, $class);
+
+ $self->create_if_missing ($file);
warn "Read input file $file\n" if $self->{verbose};
- # Bless hash into an object of the class
- return bless ($self, $class);
+ return $self;
}
sub start_element
@@ -213,27 +234,12 @@
if ($parse eq "xml")
{
warn "XInclude XML file $file\n" if $self->{verbose};
- if ($self->add_dependency ($file))
- {
- warn "Skipping $file, already read\n" if $self->{verbose};
- return;
- }
- die "Circular inclusion reference to $file\n"
- if grep { $_ eq $file } @{$self->{file_stack}};
- push (@{$self->{file_stack}}, $file);
- XML::Parser::PerlSAX->new->parse (
- # Process a new file, but use myself to do it
- Handler => $self,
- DTDHandler => $self,
- EntityResolver => $self,
- Source => { SystemId => $file }
- );
- pop (@{$self->{file_stack}});
+ $self->add_dependency ($file, $parse);
}
elsif ($parse eq "text")
{
warn "XInclude text file $file\n" if $self->{verbose};
- $self->add_dependency ($file);
+ $self->add_dependency ($file, $parse);
}
else
{
@@ -245,7 +251,7 @@
my $file = $self->relativize ($elt->{Attributes}->{fileref});
warn "File reference to $file\n" if $self->{verbose};
- $self->add_dependency ($file);
+ $self->add_dependency ($file, "fileref");
}
}
@@ -257,12 +263,7 @@
return unless defined $file;
$file = $self->relativize ($file);
warn "Parse entity file $file\n" if $self->{verbose};
- if ($self->add_dependency ($file))
- {
- warn "Skipping $file, already read\n" if $self->{verbose};
- return;
- }
- $self->parse_entity_file ($file)
+ $self->add_dependency ($file, "entity");
}
# Helper methods for handler class.
@@ -291,34 +292,29 @@
return $child;
}
-# Add argument file as dependency for the current file.
+# Add argument file as dependency for the current file and parse it if
+# it's a file that needs to be parsed (XML file or entity file).
# This also starts a deps entry for the argument (with no dependencies),
# which serves to record that the file has been seen. That way, the file
-# can be skipped without reading it if it's seen again later.
+# can be skipped without reading it if it's seen again.
-# Return true if the argument already was listed. This means it has
-# already been read, so the caller need not parse it.
-
-# If the dependency does not exit, try to use make to create it.
-# This handles the case that the document refers to files that might
-# need to be dynamically generated and do not necessarily exist when
-# dependencies need to be generated.
-
sub add_dependency
{
my $self = shift;
my $child = shift;
+my $type = shift;
my $curfile = $self->{file_stack}[-1];
my $already_read;
+my $fh;
#warn "$curfile depends on $child\n" if $self->{verbose};
# Initialize curfile dependencies to empty hash if necessary
$self->{deps}->{$curfile} = {} unless exists $self->{deps}->{$curfile};
# Add child as dependency of curfile (eliminating duplicates)
$self->{deps}->{$curfile}->{$child} = 1;
- # Start empty entry for child if none exists yet and determine
- # whether the file has been read already
+ # Start empty entry for child if none exists yet (this also
+ # determines whether the file has been read already)
if (exists $self->{deps}->{$child})
{
$already_read = 1;
@@ -328,67 +324,92 @@
$self->{deps}->{$child} = {};
$already_read = 0;
}
- # Try to create child file if it does not exist; send stdout to stderr
- # so it doesn't pollute dependency output
- if (! -e $child)
+ # These file types are not read
+ return if $type eq "text" || $type eq "fileref";
+
+ # Other types are read. For each such file:
+ # - Check the file stack to detect circular inclusion loops.
+ # - Don't bother reading the file if it has already been read.
+ # - If the file doesn't exist, try to create it. This handles the case
+ # that the document refers to files that might need to be dynamically
+ # generated and do not necessarily exist when dependencies are generated.
+ die "Circular inclusion reference to $child\n"
+ if grep { $_ eq $child } @{$self->{file_stack}};
+ if ($already_read)
{
- my ($file, $dir) = fileparse ($child);
- my $cmd = "make -C $dir $file 1>&2";
- warn "$file does not exist:\n";
- warn "attempting to create with \"$cmd\"\n";
- (system ($cmd) & 127) == 0 or die "Command failed: $cmd\n";
+ warn "Skipping $child, already read\n" if $self->{verbose};
+ return;
}
- return $already_read;
-}
+ $self->create_if_missing ($child);
-# Parse a file named as an entity, looking for further entity file
-# references. Handle nested files references by recursing.
-# Assumption: Entity file references are written this way:
-# <!ENTITY ... SYSTEM "file_name">
+ # Push file on stack, parse according to type, pop stack
-# I suppose that it would be better to use DTDHandler methods for
-# XML::Parser::PerlSAX, except that I've been able to figure out how
-# to make that work. I suspect the support for DTD parsing to be
-# incomplete.
+ push (@{$self->{file_stack}}, $child);
+ if ($type eq "xml")
+ {
+ XML::Parser::PerlSAX->new->parse (
+ # Process a new file, but use myself to do it
+ Handler => $self,
+ DTDHandler => $self,
+ EntityResolver => $self,
+ Source => { SystemId => $child }
+ );
+ }
+ elsif ($type eq "entity")
+ {
+ # Parse a file named as an entity, looking for further entity file
+ # references. Handle nested file references by recursing.
+ # Assumption: Entity file references are written this way:
+ # <!ENTITY % entity_name SYSTEM "file_name">
-# Note: Just because an entity file reference is defined does not mean
-# that the file contents is used. Strictly speaking, the reference is
-# not a dependency unless it is used, but it simplifies parsing here
-# to assume that if the file reference is defined it will also be used.
+ # I suppose that it would be better to use DTDHandler methods for
+ # XML::Parser::PerlSAX, except that I've been able to figure out how
+ # to make that work. I suspect the support for DTD parsing to be
+ # incomplete.
-sub parse_entity_file
-{
-my ($self, $file) = @_;
-my $fh;
+ # Note: Just because an entity file reference is defined does not
+ # mean that that the document ever actually refers to the entity.
+ # Strictly speaking, the file is not a dependency unless it is
+ # references, but it simplifies parsing here to assume that if the
+ # file entity is defined it will also be referenced.
- #if (exists $self->{deps}->{$file})
- #{
- # warn "Skipping $file, already read\n" if $self->{verbose};
- # return;
- #}
- die "Circular inclusion reference to $file\n"
- if grep { $_ eq $file } @{$self->{file_stack}};
- push (@{$self->{file_stack}}, $file);
- $fh = IO::File->new ($file, "r");
- binmode ($fh, ":utf8");
- while (defined ($_ = <$fh>))
- {
- chomp;
- if (/<!ENTITY\s+\%\s*\S+\s+SYSTEM\s+['"](.*)['"]\s*>/)
+ $fh = IO::File->new ($child, "r");
+ binmode ($fh, ":utf8");
+ while (defined ($_ = <$fh>))
{
- my $entfile = $self->relativize ($1);
+ chomp;
+ if (/<!ENTITY\s+\%\s*\S+\s+SYSTEM\s+['"](.*)['"]\s*>/)
+ {
+ my $entfile = $self->relativize ($1);
- warn "Parse nested entity file $entfile\n" if $self->{verbose};
- if ($self->add_dependency ($entfile))
- {
- warn "Skipping $file, already read\n" if $self->{verbose};
- return;
+ warn "Parse nested entity file $entfile\n" if $self->{verbose};
+ $self->add_dependency ($entfile, "entity");
}
- $self->parse_entity_file ($entfile);
}
+ $fh->close ();
}
- $fh->close ();
+ else
+ {
+ die "LOGIC ERROR: Unknown file type: $type\n";
+ }
pop (@{$self->{file_stack}});
}
+# Try to create a file that is to be read if it is missing.
+# Route stdout to stderr so it doesn't pollute dependency output.
+
+sub create_if_missing
+{
+my ($self, $file) = @_;
+
+ if (! -e $file)
+ {
+ my ($base, $dir) = fileparse ($file);
+ my $cmd = "make -C $dir $base 1>&2";
+ warn "$file does not exist:\n";
+ warn "attempting to create with \"$cmd\"\n";
+ (system ($cmd) == 0) or die "Command failed: $cmd\n";
+ }
+}
+
1;
| Thread |
|---|
| • svn commit - mysqldoc@docsrva: r5994 - in trunk: . tools | paul | 19 Apr |