Author: mcbrown
Date: 2007-05-26 22:14:31 +0200 (Sat, 26 May 2007)
New Revision: 6587
Log:
Updating word count tool to support word counts by section (un-ignored only)
Updates to the makefile so that we count prepped files
New rule for doing a word count with results by section:
make file.wcs
Modified:
trunk/make.d/xml-format
trunk/tools/xml-word-count.pl
Modified: trunk/make.d/xml-format
===================================================================
--- trunk/make.d/xml-format 2007-05-26 19:42:22 UTC (rev 6586)
+++ trunk/make.d/xml-format 2007-05-26 20:14:31 UTC (rev 6587)
Changed blocks: 1, Lines Added: 9, Lines Deleted: 5; 939 bytes
@@ -7,13 +7,17 @@
%.format %.reformat:
$(XMLFORMAT) $*.xml
-%.wc:
- $(XMLWORDCOUNT) $*.xml
+%.wc: %-prepped.xml
+ $(XMLWORDCOUNT) $<
-%.wcd:
- $(XMLWORDCOUNT) --detailed $*.xml
+%.wcd: %-prepped.xml
+ $(XMLWORDCOUNT) --detailed $<
+%.wcs: %-prepped.xml
+ $(XMLWORDCOUNT) --sectiondetail $<
+
help::
@echo "make file.format - put file.xml in standard format"
@echo "make file.wc - counts the words within XML tags"
- @echo "make file.wcd - counts the words within XML tags (with tag-by-tag counts"
+ @echo "make file.wcd - counts the words within XML tags (with tag-by-tag counts)"
+ @echo "make file.wcs - counts the words within XML tags (with section counts)"
Modified: trunk/tools/xml-word-count.pl
===================================================================
--- trunk/tools/xml-word-count.pl 2007-05-26 19:42:22 UTC (rev 6586)
+++ trunk/tools/xml-word-count.pl 2007-05-26 20:14:31 UTC (rev 6587)
Changed blocks: 8, Lines Added: 46, Lines Deleted: 3; 3028 bytes
@@ -9,6 +9,7 @@
use strict;
use Getopt::Long;
+use Data::Dumper;
# Standard Tags to be excluded from the word count go here
@@ -39,11 +40,12 @@
/;
-my ($addexctags,$inctags,$help,$detailed) = ('','',0,0);
+my ($addexctags,$inctags,$help,$detailed,$sectiondetail) = ('','',0,0,0);
GetOptions('exclude=s' => \$addexctags,
'include=s' => \$inctags,
'detailed' => \$detailed,
+ 'sectiondetail' => \$sectiondetail,
'help' => \$help);
if ($help)
@@ -134,7 +136,7 @@
foreach my $file (keys %{$filemap})
{
($filemap->{$file}->{wc},
- $filemap->{$file}->{iwc}) = gen_stats($file,$detailed);
+ $filemap->{$file}->{iwc}) = gen_stats($file,$detailed,$sectiondetail);
}
my ($total,$itotal) = (0,0);
@@ -153,7 +155,7 @@
sub gen_stats
{
- my ($file,$detailed) = @_;
+ my ($file,$detailed,$sectiondetail) = @_;
my $my_handler = MySQLDocBookStats->new($tagmap);
@@ -172,9 +174,21 @@
printf("%-40s %9s %9d\n", 'Total valid Words: ','',$my_handler->{totalwc});
printf("%-40s %9s %9d\n", 'Ignored Words: ','',$my_handler->{ignorewc});
}
+
+ if ($sectiondetail)
+ {
+ printf("%-60s %9s\n","Section ID","Count");
+
+ foreach my $id (sort keys %{$my_handler->{idwc}})
+ {
+ printf("%-60s %9d\n",$id,$my_handler->{idwc}->{$id});
+ }
+ }
+
return ($my_handler->{totalwc},$my_handler->{ignorewc});
}
+
package MySQLDocBookStats;
sub new
@@ -192,6 +206,9 @@
'currtext' => '',
'totalwc' => 0,
'ignorewc' => 0,
+ 'idmap' => [],
+ 'idwc' => {},
+ 'idcurr' => '',
'currtextmap' => [],
}, $class;
}
@@ -200,6 +217,20 @@
{
my ($self, $element) = @_;
+ if (exists($element->{Attributes}->{id}) &&
+ ($element->{Name} =~ m/^(section|chapter)$/i))
+ {
+ if (scalar @{$self->{idmap}} == 0)
+ {
+ push @{$self->{idmap}},$element->{Attributes}->{id};
+ }
+ else
+ {
+ push @{$self->{idmap}},$self->{idcurr};
+ }
+ $self->{idcurr} = $element->{Attributes}->{id};
+ }
+
$self->{currtext} .= ' ';
push @{$self->{currtextmap}},$self->{currtext};
$self->{currtext} = '';
@@ -220,8 +251,20 @@
else
{
$self->{totalwc} += $wc;
+ $self->{idwc}->{$self->{idcurr}} += $wc;
}
$self->{currtext} = pop @{$self->{currtextmap}};
+
+ if ($element->{Name} =~ m/^(section|chapter)$/i)
+ {
+ my $old = $self->{idcurr};
+ $self->{idcurr} = pop @{$self->{idmap}} || $self->{idcurr};
+ if ($old ne $self->{idcurr})
+ {
+ $self->{idwc}->{$self->{idcurr}} += $self->{idwc}->{$old};
+ }
+ }
+
}
sub characters
| Thread |
|---|
| • svn commit - mysqldoc@docsrva: r6587 - in trunk: make.d tools | mcbrown | 26 May |