List:Commits« Previous MessageNext Message »
From:mcbrown Date:March 16 2007 9:16am
Subject:svn commit - mysqldoc@docsrva: r5407 - in trunk: make.d refman-5.1 tools
View as plain text  
Author: mcbrown
Date: 2007-03-16 10:16:07 +0100 (Fri, 16 Mar 2007)
New Revision: 5407

Log:
Updating xml-word-count script
Created new make rules for word counts



Modified:
   trunk/make.d/vars-docbook
   trunk/make.d/xml-format
   trunk/refman-5.1/Makefile
   trunk/tools/xml-word-count.pl


Modified: trunk/make.d/vars-docbook
===================================================================
--- trunk/make.d/vars-docbook	2007-03-16 07:47:20 UTC (rev 5406)
+++ trunk/make.d/vars-docbook	2007-03-16 09:16:07 UTC (rev 5407)
Changed blocks: 1, Lines Added: 4, Lines Deleted: 0; 483 bytes

@@ -100,6 +100,10 @@
 
 GENARBITRARY = $(TOOLS_DIR)/genarbelements.pl
 
+# XMLWORDCOUNT counts the words in an XML file, by tag
+
+XMLWORDCOUNT = $(TOOLS_DIR)/xml-word-count.pl
+
 # IDMAP builds a map of the IDs in an XML document
 
 IDMAP = $(TOOLS_DIR)/idmap.pl


Modified: trunk/make.d/xml-format
===================================================================
--- trunk/make.d/xml-format	2007-03-16 07:47:20 UTC (rev 5406)
+++ trunk/make.d/xml-format	2007-03-16 09:16:07 UTC (rev 5407)
Changed blocks: 1, Lines Added: 8, Lines Deleted: 0; 599 bytes

@@ -7,5 +7,13 @@
 %.format %.reformat:
 	$(XMLFORMAT) $*.xml
 
+%.wc: 
+	$(XMLWORDCOUNT) $*.xml
+
+%.wcd: 
+	$(XMLWORDCOUNT) --detailed $*.xml
+
 help::
 	@echo "make file.format          - put file.xml in standard format"
+	@echo "make file.wc              - counts the words within XML tags"
+	@echo "make file.wcd             - counts the words within XML tags (with tag-by-tag counts"


Modified: trunk/refman-5.1/Makefile
===================================================================
--- trunk/refman-5.1/Makefile	2007-03-16 07:47:20 UTC (rev 5406)
+++ trunk/refman-5.1/Makefile	2007-03-16 09:16:07 UTC (rev 5407)
Changed blocks: 1, Lines Added: 3, Lines Deleted: 0; 531 bytes

@@ -47,6 +47,9 @@
 manual-prepped.xml: $(MANUAL_SRCS) $(IDMAP_OBJS)
 manual-manprepped.xml: $(MANUAL_SRCS) $(IDMAP_OBJS)
 
+manual.wc: manual-prepped.xml
+	$(XMLWORDCOUNT) manual-prepped.xml
+
 # Make sure we trigger a rebuild of the dynamic tables 
 # when validating manual.xml or requiring the DBA chapter
 


Modified: trunk/tools/xml-word-count.pl
===================================================================
--- trunk/tools/xml-word-count.pl	2007-03-16 07:47:20 UTC (rev 5406)
+++ trunk/tools/xml-word-count.pl	2007-03-16 09:16:07 UTC (rev 5407)
Changed blocks: 4, Lines Added: 38, Lines Deleted: 19; 3467 bytes

@@ -39,10 +39,11 @@
     /;
 
 
-my ($addexctags,$inctags,$help) = ('','',0);
+my ($addexctags,$inctags,$help,$detailed) = ('','',0,0);
 
 GetOptions('exclude=s' => \$addexctags,
            'include=s' => \$inctags,
+           'detailed' => \$detailed,
            'help' => \$help);
 
 if ($help)

@@ -121,41 +122,52 @@
 {
 # We skip anything not an XML file
     next unless ($file =~ m/\.xml$/);
-    $filemap->{$file} = 1;
+    $filemap->{$file} = {wc => 0,
+                         iwc => 0};
 }
 
 foreach my $file (keys %{$filemap})
 {
-    $filemap->{$file} = gen_stats($file,$filemap);
+    ($filemap->{$file}->{wc},
+     $filemap->{$file}->{iwc}) = gen_stats($file,$detailed);
 }
 
-if (scalar keys %{$filemap} > 1)
+my ($total,$itotal) = (0,0);
+print("Summary by filename\n--------------\n");
+foreach my $file (keys %{$filemap})
 {
-    print("\n\nSummary by filename\n--------------\n\n");
-    foreach my $file (keys %{$filemap})
-    {
-        printf("%-40s %d\n",$file,$filemap->{$file});
-    }
+    printf("%-40s %d (%d words ignored)\n",
+           $file,
+           $filemap->{$file}->{wc},
+           $filemap->{$file}->{iwc});
+    $total += $filemap->{$file}->{wc};
+    $itotal += $filemap->{$file}->{iwc};
 }
+print "--\n";
+printf("%-40s %d (%d words ignored)\n",'Total',$total,$itotal);
 
 sub gen_stats
 {
-    my ($file) = @_;
+    my ($file,$detailed) = @_;
 
     my $my_handler = MySQLDocBookStats->new($tagmap);
 
     XML::Parser::PerlSAX->new->parse(Source => { SystemId => $file}, 
                                      Handler => $my_handler);
 
-    printf("%-40s %9s %9s\n",'Tag','Occurs','Words');
-
-    foreach my $tag (sort keys %{$my_handler->{tagcount}})
+    if ($detailed)
     {
-        printf("%-40s %9d %9d %s\n",$tag, $my_handler->{tagcount}->{$tag} || 0,$my_handler->{tagwordcount}->{$tag} || 0,
-               (exists($tagmap->{$tag}) ? '(ignored)' : ''));
+        printf("%-40s %9s %9s\n",'Tag','Occurs','Words');
+        
+        foreach my $tag (sort keys %{$my_handler->{tagcount}})
+        {
+            printf("%-40s %9d %9d %s\n",$tag, $my_handler->{tagcount}->{$tag} || 0,$my_handler->{tagwordcount}->{$tag} || 0,
+                   (exists($tagmap->{$tag}) ? '(ignored)' : ''));
+        }
+        printf("%-40s %9s %9d\n", 'Total valid Words: ','',$my_handler->{totalwc});
+        printf("%-40s %9s %9d\n", 'Ignored Words: ','',$my_handler->{ignorewc});
     }
-    printf("%-40s %9s %9d\n", 'Total valid Words: ','',$my_handler->{totalwc});
-    return ($my_handler->{totalwc});
+    return ($my_handler->{totalwc},$my_handler->{ignorewc});
 }
 
 package MySQLDocBookStats;

@@ -174,6 +186,7 @@
                   'captext' => 1,
                   'currtext' => '',
                   'totalwc' => 0,
+                  'ignorewc' => 0,
                   'currtextmap' => [],
               }, $class;
 }

@@ -195,8 +208,14 @@
     
     my $wc = count_words($self->{currtext});
     $self->{tagwordcount}->{$element->{Name}} += $wc;
-    $self->{totalwc} += $wc
-        unless (exists($self->{tagmap}->{$element->{Name}}));
+    if (exists($self->{tagmap}->{$element->{Name}}))
+    {
+        $self->{ignorewc} += $wc;
+    }
+    else
+    {
+        $self->{totalwc} += $wc;
+    }
     $self->{currtext} = pop @{$self->{currtextmap}};
 }    
 


Thread
svn commit - mysqldoc@docsrva: r5407 - in trunk: make.d refman-5.1 toolsmcbrown16 Mar