Author: mcbrown
Date: 2007-04-23 11:35:31 +0200 (Mon, 23 Apr 2007)
New Revision: 6055
Log:
Updated version of ID map that also records ID-tracked dependencies for images, included blocks and entities
Added:
trunk/tools/idmap2.pl
Property changes on: trunk/tools/idmap2.pl
___________________________________________________________________
Name: svn:executable
+ *
Added: trunk/tools/idmap2.pl
===================================================================
--- trunk/tools/idmap2.pl (rev 0)
+++ trunk/tools/idmap2.pl 2007-04-23 09:35:31 UTC (rev 6055)
Changed blocks: 1, Lines Added: 474, Lines Deleted: 0; 13108 bytes
@@ -0,0 +1,474 @@
+#! /usr/bin/perl -w
+# vim:set ts=2 sw=2 expandtab:
+
+# idmap.pl - build a map of section, para and other IDs to map references
+
+# Martin MC Brown
+# mc@stripped
+# 2006-08-03
+
+use strict;
+use File::Basename;
+use File::Path;
+use Getopt::Long;
+use IO::File;
+use Data::Dumper;
+
+my $idmapversion = 1;
+
+my $opt_reconcile = 0;
+
+GetOptions("reconcile" => \$opt_reconcile);
+
+eval "require XML::Parser::PerlSAX;";
+
+if ($@)
+{
+ die <<EOF;
+ERROR: Cannot load the PerlSAX parser.
+
+You need to install the expat library and the XML::Parser::PerlSAX module for perl.
+Either do it by hand:
+ - libexpat is available from http://expat.sourceforge.net
+ - PerlSAX is available from http://search.cpan.org/~kmacleod/libxml-perl-0.08/lib/XML/Parser/PerlSAX.pm
+
+Using CPAN:
+ - Install libexpat
+ - Run:
+
+\$ perl -MCPAN -e 'install XML::Parser::PerlSAX'
+
+Using apt-get:
+
+\$ apt-get install libexpat-dev
+\$ perl -MCPAN -e 'install XML::Parser::PerlSAX'
+
+Using YaST:
+
+\$ yast -i expat
+\$ perl -MCPAN -e 'install XML::Parser::PerlSAX'
+
+Please install and try again.
+
+EOF
+}
+
+my $urlbase = shift;
+
+my ($docbase,$docversion,$doclang) = ('','','');
+my @elems = split(m{/},$urlbase);
+if (scalar @elems == 3)
+{
+ ($docbase,$docversion,$doclang) = ($elems[0],$elems[1],$elems[2]);
+}
+else
+{
+ ($docbase,$doclang) = ($elems[0],$elems[1]);
+}
+
+my $filemap = {};
+
+if ($opt_reconcile)
+{
+ reconcile_maps(@ARGV);
+ exit(0);
+}
+
+for my $file (@ARGV)
+{
+# We skip anything not an XML file
+ next unless ($file =~ m/\.xml$/);
+# We skip anything not in the current directory
+ next if ($file =~ m{/});
+ $filemap->{$file} = 1;
+}
+
+while(scalar keys %{$filemap})
+{
+ foreach my $file (keys %{$filemap})
+ {
+ gen_idmap($file,$filemap);
+ }
+}
+
+sub gen_idmap
+{
+ my ($file,$filemap) = @_;
+
+ my ($name,$path,$suffix) = fileparse($file,qw/xml/);
+ my ($mapfile) = sprintf('%s/%s/%s%s',$path,'metadata',$name,'idmap');
+ my ($maptempfile) = sprintf('%s-tmp',$mapfile);
+ my ($mapdir) = sprintf('%s/%s',$path,'metadata');
+
+ # no update if $mapfile is not out of date (exists and was
+ # modified more recently than $file)
+ if (defined(-M $mapfile) && (-M $file >= -M $mapfile))
+ {
+ delete($filemap->{$file});
+ return;
+ }
+
+ mkpath($mapdir);
+
+ my $my_handler = MySQLDocBook->new();
+ my $my_dtdhandler = MyDTDHandler->new();
+
+ XML::Parser::PerlSAX->new->parse(Source => { SystemId => $file},
+ Handler => $my_handler,
+ DTDHandler => $my_dtdhandler);
+
+ # Write out the ID map
+
+ my $dest = IO::File->new($maptempfile,'w');
+
+ die "Couldn't open map destination file ($maptempfile): $!" unless (defined($dest));
+
+ binmode($dest,':utf8');
+
+ print $dest "!!!mapversion:1!!!\n";
+ printf $dest "!!!INFO!!!%s\n",join(':',$file,$docbase,$docversion,$doclang,$urlbase);
+
+ foreach my $id (sort { $a cmp $b } keys %{$my_handler->{ids}})
+ {
+ if (!defined($id) || $id eq '')
+ {
+ printf STDERR ("WARNING: Can't make a valid ID $dest on %s when the ID for '%s' is not specified\n",
+ $file,
+ $my_handler->{ids}->{$id}->{title});
+ }
+ else
+ {
+ print $dest (join(':',
+ $id,
+ $my_handler->{ids}->{$id}->{type} || '',
+ $my_handler->{ids}->{$id}->{sectionparent} || '',
+ $my_handler->{ids}->{$id}->{title} || $id),
+ "\n");
+ }
+ }
+ close($dest);
+
+ unlink($mapfile);
+ rename($maptempfile,$mapfile);
+ delete($filemap->{$file});
+
+ if (scalar @{$my_handler->{revlink}})
+ {
+ # Write out the reverse ID map
+
+ ($mapfile) = sprintf('%s/%s/%s%s',$path,'metadata',$name,'ridmap');
+ ($maptempfile) = sprintf('%s-tmp',$mapfile);
+
+ $dest = IO::File->new($maptempfile,'w');
+
+ die "Couldn't open map destination file ($maptempfile): $!" unless (defined($dest));
+
+ binmode($dest,':utf8');
+
+ print $dest "!!!mapversion:1!!!\n";
+ printf $dest "!!!INFO!!!%s\n",join(':',$file,$docbase,$docversion,$doclang,$urlbase);
+
+ foreach my $revlink (@{$my_handler->{revlink}})
+ {
+ print $dest (join(':',
+ $revlink->{parent},
+ $revlink->{linkend}),
+ "\n");
+ }
+ close($dest);
+
+ unlink($mapfile);
+ rename($maptempfile,$mapfile);
+ delete($filemap->{$file});
+ }
+
+ if (scalar @{$my_handler->{images}})
+ {
+ # Write out the image map
+
+ ($mapfile) = sprintf('%s/%s/%s%s',$path,'metadata',$name,'images');
+ ($maptempfile) = sprintf('%s-tmp',$mapfile);
+
+ $dest = IO::File->new($maptempfile,'w');
+
+ die "Couldn't open map destination file ($maptempfile): $!" unless (defined($dest));
+
+ binmode($dest,':utf8');
+
+ print $dest "!!!mapversion:1!!!\n";
+ printf $dest "!!!INFO!!!%s\n",join(':',$file,$docbase,$docversion,$doclang,$urlbase);
+
+ foreach my $images (@{$my_handler->{images}})
+ {
+ print $dest (join(':',
+ $images->{parent},
+ $images->{fileref}),
+ "\n");
+ }
+ close($dest);
+
+ unlink($mapfile);
+ rename($maptempfile,$mapfile);
+ delete($filemap->{$file});
+ }
+
+ if (scalar @{$my_handler->{xmlimports}})
+ {
+ # Write out the xmlimports
+
+ ($mapfile) = sprintf('%s/%s/%s%s',$path,'metadata',$name,'xmlimports');
+ ($maptempfile) = sprintf('%s-tmp',$mapfile);
+
+ $dest = IO::File->new($maptempfile,'w');
+
+ die "Couldn't open map destination file ($maptempfile): $!" unless (defined($dest));
+
+ binmode($dest,':utf8');
+
+ print $dest "!!!mapversion:1!!!\n";
+ printf $dest "!!!INFO!!!%s\n",join(':',$file,$docbase,$docversion,$doclang,$urlbase);
+
+ foreach my $xmlimport (@{$my_handler->{xmlimports}})
+ {
+ print $dest (join(':',
+ $xmlimport->{parent},
+ $xmlimport->{href}),
+ "\n");
+ }
+ close($dest);
+
+ unlink($mapfile);
+ rename($maptempfile,$mapfile);
+ delete($filemap->{$file});
+ }
+
+ if (scalar keys %{$my_dtdhandler->{entityfiles}})
+ {
+ # Write out the entity imports
+
+ ($mapfile) = sprintf('%s/%s/%s%s',$path,'metadata',$name,'entities');
+ ($maptempfile) = sprintf('%s-tmp',$mapfile);
+
+ $dest = IO::File->new($maptempfile,'w');
+
+ die "Couldn't open map destination file ($maptempfile): $!" unless (defined($dest));
+
+ binmode($dest,':utf8');
+
+ print $dest "!!!mapversion:1!!!\n";
+ printf $dest "!!!INFO!!!%s\n",join(':',$file,$docbase,$docversion,$doclang,$urlbase);
+
+ foreach my $entityimport (keys %{$my_dtdhandler->{entityfiles}})
+ {
+ print $dest ($entityimport,"\n");
+ }
+ close($dest);
+
+ unlink($mapfile);
+ rename($maptempfile,$mapfile);
+ delete($filemap->{$file});
+ }
+}
+
+sub reconcile_maps
+{
+ my (@files) = @_;
+
+ my $mapfilelookup = {};
+
+ foreach my $file (@files)
+ {
+ next unless $file =~ m/\.xml$/;
+ my ($name,$path,$suffix) = fileparse($file,qw/xml/);
+ my $mapfile = sprintf('%s/%s/%s%s',$path,'metadata',$name,'idmap');
+ $mapfile =~ s{//}{/}g;
+ $mapfilelookup->{$mapfile} = $file;
+ }
+
+ my @idmaps = glob('./metadata/*.idmap');
+
+ foreach my $idmapfile (@idmaps)
+ {
+ if (!exists($mapfilelookup->{$idmapfile}))
+ {
+# print STDERR "WARNING: deleting $idmapfile (no associated XML)\n";
+ unlink($idmapfile);
+ }
+ }
+}
+
+# This is not used, but retained here for reference
+
+package MyDTDHandler;
+use Data::Dumper;
+use XML::Parser::PerlSAX;
+
+sub new
+{
+ my $self = shift;
+ my $class = ref($self) || $self;
+
+ return bless {entities => {},
+ entityfiles => {},}, $class;
+}
+
+sub entity_decl
+{
+ my ($self,$element) = @_;
+
+ if (!defined($element->{Value}))
+ {
+ $self->parseentityfile($element->{SystemId});
+ $self->{entityfiles}->{$element->{SystemId}} = 1;
+ }
+}
+
+sub parseentityfile
+{
+ my ($self,$file) = @_;
+
+ my $entfile = IO::File->new($file,'r');
+
+ if (!defined($entfile))
+ {
+ warn "Couldn't open $file\n";
+ return;
+ }
+ while(my $line = <$entfile>)
+ {
+ if ($line =~ m/<!ENTITY.*?SYSTEM\s+['"](.*?)["']\s*>/)
+ {
+ $self->{entityfiles}->{$1} = 1;
+ $self->parseentityfile($1);
+ }
+ }
+ close($entfile);
+}
+
+1;
+
+package MySQLDocBook;
+
+sub new
+{
+ my $self = shift;
+ my $class = ref($self) || $self;
+ my $tag = shift;
+
+ return bless {'tag' => $tag,
+ 'sectionids' => {},
+ 'currid' => '',
+ 'currsection' => '',
+ 'sectionmap' => [],
+ 'revlink' => [],
+ 'images' => [],
+ 'xmlimports' => [],
+ 'ids' => {},
+ 'captext' => 0,
+ 'currtext' => '',
+ }, $class;
+}
+
+sub start_element
+{
+ my ($self, $element) = @_;
+
+ if (($element->{Name} eq 'link') ||
+ ($element->{Name} eq 'xref'))
+ {
+ push @{$self->{revlink}},{linkend => $element->{Attributes}->{linkend},
+ parent => $self->{currsection}};
+ }
+
+ if (($element->{Name} eq 'xi:include'))
+ {
+ push @{$self->{xmlimports}},{href => $element->{Attributes}->{href},
+ parent => $self->{currsection}};
+ }
+
+ if (($element->{Name} eq 'imagedata'))
+ {
+ push @{$self->{images}},{fileref => $element->{Attributes}->{fileref},
+ parent => $self->{currsection}};
+ }
+
+ if (exists($element->{Attributes}->{id}))
+ {
+ return unless ($element->{Name} =~ m/^(appendix|article|book|part|chapter|example|glossary|para|preface|programlisting|refentry|refsection|section|figure)$/i);
+
+ $self->{ids}->{$element->{Attributes}->{id}} = {'type' => $element->{Name},
+ 'sectionparent' => $self->{currsection} ||
+ $element->{Attributes}->{id} || '--TOP--',
+ 'title' => undef,
+ };
+ if ($element->{Name} eq 'section' ||
+ $element->{Name} eq 'refsection')
+ {
+ push(@{$self->{sectionmap}},$element->{Attributes}->{id});
+ $self->{currsection} = $element->{Attributes}->{id};
+ if (exists($self->{sectionids}->{$element->{Attributes}->{id}}))
+ {
+ print STDERR "Error: Found a duplicate section ID ($element->{Attributes}->{id})\n";
+ }
+ else
+ {
+ $self->{sectionids}->{$element->{Attributes}->{id}} = 0;
+ }
+ $self->{sectionids}->{$element->{Attributes}->{id}}++;
+ }
+ $self->{currid} = $element->{Attributes}->{id};
+ }
+ if ($self->{captext})
+ {
+ $self->{currtext} .= sprintf('<%s>',$element->{Name});
+ }
+ if ($element->{Name} eq 'title')
+ {
+ $self->{captext} = 1;
+ $self->{currtext} = '';
+ }
+}
+
+sub end_element
+{
+ my ($self, $element) = @_;
+
+ if ($element->{Name} eq 'section')
+ {
+ if (exists($element->{Attributes}->{id}))
+ {
+ my $cursection = pop(@{$self->{sectionmap}});
+ $self->{currsection} = $self->{sectionmap}->[-1];
+ }
+ }
+ if ($element->{Name} eq 'title' && (!defined($self->{ids}->{$self->{currid}}->{title})))
+ {
+ $self->{ids}->{$self->{currid}}->{title} = $self->{currtext};
+ $self->{captext} = 0;
+ }
+ if ($self->{captext})
+ {
+ $self->{currtext} .= sprintf('</%s>',$element->{Name});
+ }
+}
+
+sub characters
+{
+ my ($self, $element) = @_;
+ $element->{Data} =~ s/[\r\n]+/ /g;
+ while ($element->{Data} =~ s/[ ][ ]+/ /g) {}
+ $self->{currtext} .= $element->{Data} if ($self->{captext});
+}
+
+sub entity_reference
+{
+ my ($self, $element) = @_;
+
+ if ($self->{captext})
+ {
+ $self->{currtext} .= sprintf('&%s;',$element->{Name});
+ }
+}
+
+
+1;
Property changes on: trunk/tools/idmap2.pl
___________________________________________________________________
Name: svn:executable
+ *
| Thread |
|---|
| • svn commit - mysqldoc@docsrva: r6055 - trunk/tools | mcbrown | 23 Apr |