Author: mcbrown
Date: 2007-03-16 10:16:07 +0100 (Fri, 16 Mar 2007)
New Revision: 5407
Log:
Updating xml-word-count script
Created new make rules for word counts
Modified:
trunk/make.d/vars-docbook
trunk/make.d/xml-format
trunk/refman-5.1/Makefile
trunk/tools/xml-word-count.pl
Modified: trunk/make.d/vars-docbook
===================================================================
--- trunk/make.d/vars-docbook 2007-03-16 07:47:20 UTC (rev 5406)
+++ trunk/make.d/vars-docbook 2007-03-16 09:16:07 UTC (rev 5407)
Changed blocks: 1, Lines Added: 4, Lines Deleted: 0; 483 bytes
@@ -100,6 +100,10 @@
GENARBITRARY = $(TOOLS_DIR)/genarbelements.pl
+# XMLWORDCOUNT counts the words in an XML file, by tag
+
+XMLWORDCOUNT = $(TOOLS_DIR)/xml-word-count.pl
+
# IDMAP builds a map of the IDs in an XML document
IDMAP = $(TOOLS_DIR)/idmap.pl
Modified: trunk/make.d/xml-format
===================================================================
--- trunk/make.d/xml-format 2007-03-16 07:47:20 UTC (rev 5406)
+++ trunk/make.d/xml-format 2007-03-16 09:16:07 UTC (rev 5407)
Changed blocks: 1, Lines Added: 8, Lines Deleted: 0; 599 bytes
@@ -7,5 +7,13 @@
%.format %.reformat:
$(XMLFORMAT) $*.xml
+%.wc:
+ $(XMLWORDCOUNT) $*.xml
+
+%.wcd:
+ $(XMLWORDCOUNT) --detailed $*.xml
+
help::
@echo "make file.format - put file.xml in standard format"
+ @echo "make file.wc - counts the words within XML tags"
+ @echo "make file.wcd - counts the words within XML tags (with tag-by-tag counts"
Modified: trunk/refman-5.1/Makefile
===================================================================
--- trunk/refman-5.1/Makefile 2007-03-16 07:47:20 UTC (rev 5406)
+++ trunk/refman-5.1/Makefile 2007-03-16 09:16:07 UTC (rev 5407)
Changed blocks: 1, Lines Added: 3, Lines Deleted: 0; 531 bytes
@@ -47,6 +47,9 @@
manual-prepped.xml: $(MANUAL_SRCS) $(IDMAP_OBJS)
manual-manprepped.xml: $(MANUAL_SRCS) $(IDMAP_OBJS)
+manual.wc: manual-prepped.xml
+ $(XMLWORDCOUNT) manual-prepped.xml
+
# Make sure we trigger a rebuild of the dynamic tables
# when validating manual.xml or requiring the DBA chapter
Modified: trunk/tools/xml-word-count.pl
===================================================================
--- trunk/tools/xml-word-count.pl 2007-03-16 07:47:20 UTC (rev 5406)
+++ trunk/tools/xml-word-count.pl 2007-03-16 09:16:07 UTC (rev 5407)
Changed blocks: 4, Lines Added: 38, Lines Deleted: 19; 3467 bytes
@@ -39,10 +39,11 @@
/;
-my ($addexctags,$inctags,$help) = ('','',0);
+my ($addexctags,$inctags,$help,$detailed) = ('','',0,0);
GetOptions('exclude=s' => \$addexctags,
'include=s' => \$inctags,
+ 'detailed' => \$detailed,
'help' => \$help);
if ($help)
@@ -121,41 +122,52 @@
{
# We skip anything not an XML file
next unless ($file =~ m/\.xml$/);
- $filemap->{$file} = 1;
+ $filemap->{$file} = {wc => 0,
+ iwc => 0};
}
foreach my $file (keys %{$filemap})
{
- $filemap->{$file} = gen_stats($file,$filemap);
+ ($filemap->{$file}->{wc},
+ $filemap->{$file}->{iwc}) = gen_stats($file,$detailed);
}
-if (scalar keys %{$filemap} > 1)
+my ($total,$itotal) = (0,0);
+print("Summary by filename\n--------------\n");
+foreach my $file (keys %{$filemap})
{
- print("\n\nSummary by filename\n--------------\n\n");
- foreach my $file (keys %{$filemap})
- {
- printf("%-40s %d\n",$file,$filemap->{$file});
- }
+ printf("%-40s %d (%d words ignored)\n",
+ $file,
+ $filemap->{$file}->{wc},
+ $filemap->{$file}->{iwc});
+ $total += $filemap->{$file}->{wc};
+ $itotal += $filemap->{$file}->{iwc};
}
+print "--\n";
+printf("%-40s %d (%d words ignored)\n",'Total',$total,$itotal);
sub gen_stats
{
- my ($file) = @_;
+ my ($file,$detailed) = @_;
my $my_handler = MySQLDocBookStats->new($tagmap);
XML::Parser::PerlSAX->new->parse(Source => { SystemId => $file},
Handler => $my_handler);
- printf("%-40s %9s %9s\n",'Tag','Occurs','Words');
-
- foreach my $tag (sort keys %{$my_handler->{tagcount}})
+ if ($detailed)
{
- printf("%-40s %9d %9d %s\n",$tag, $my_handler->{tagcount}->{$tag} || 0,$my_handler->{tagwordcount}->{$tag} || 0,
- (exists($tagmap->{$tag}) ? '(ignored)' : ''));
+ printf("%-40s %9s %9s\n",'Tag','Occurs','Words');
+
+ foreach my $tag (sort keys %{$my_handler->{tagcount}})
+ {
+ printf("%-40s %9d %9d %s\n",$tag, $my_handler->{tagcount}->{$tag} || 0,$my_handler->{tagwordcount}->{$tag} || 0,
+ (exists($tagmap->{$tag}) ? '(ignored)' : ''));
+ }
+ printf("%-40s %9s %9d\n", 'Total valid Words: ','',$my_handler->{totalwc});
+ printf("%-40s %9s %9d\n", 'Ignored Words: ','',$my_handler->{ignorewc});
}
- printf("%-40s %9s %9d\n", 'Total valid Words: ','',$my_handler->{totalwc});
- return ($my_handler->{totalwc});
+ return ($my_handler->{totalwc},$my_handler->{ignorewc});
}
package MySQLDocBookStats;
@@ -174,6 +186,7 @@
'captext' => 1,
'currtext' => '',
'totalwc' => 0,
+ 'ignorewc' => 0,
'currtextmap' => [],
}, $class;
}
@@ -195,8 +208,14 @@
my $wc = count_words($self->{currtext});
$self->{tagwordcount}->{$element->{Name}} += $wc;
- $self->{totalwc} += $wc
- unless (exists($self->{tagmap}->{$element->{Name}}));
+ if (exists($self->{tagmap}->{$element->{Name}}))
+ {
+ $self->{ignorewc} += $wc;
+ }
+ else
+ {
+ $self->{totalwc} += $wc;
+ }
$self->{currtext} = pop @{$self->{currtextmap}};
}
| Thread |
|---|
| • svn commit - mysqldoc@docsrva: r5407 - in trunk: make.d refman-5.1 tools | mcbrown | 16 Mar |