List:Commits« Previous MessageNext Message »
From:mcbrown Date:May 25 2007 7:58pm
Subject:svn commit - mysqldoc@docsrva: r6578 - in trunk: make.d tools
View as plain text  
Author: mcbrown
Date: 2007-05-25 21:58:20 +0200 (Fri, 25 May 2007)
New Revision: 6578

Log:
Adding spell checking tool



Added:
   trunk/tools/spell-check.pl
Modified:
   trunk/make.d/vars-docbook
   trunk/make.d/xml-valid

Property changes on: trunk/tools/spell-check.pl
___________________________________________________________________
Name: svn:executable
   + *


Modified: trunk/make.d/vars-docbook
===================================================================
--- trunk/make.d/vars-docbook	2007-05-25 19:48:56 UTC (rev 6577)
+++ trunk/make.d/vars-docbook	2007-05-25 19:58:20 UTC (rev 6578)
Changed blocks: 1, Lines Added: 4, Lines Deleted: 0; 531 bytes

@@ -130,6 +130,10 @@
 
 DEEP_CHECK = $(TOOLS_DIR)/deep-check.pl
 
+# SPELL_CHECK performs a spell check of the MySQL DocBook XML files
+
+SPELL_CHECK = $(TOOLS_DIR)/spell-check.pl
+
 # GENARBITRARY takes an aspec file and template and generates and arbitrary doc
 
 GENARBITRARY = $(TOOLS_DIR)/genarbelements.pl


Modified: trunk/make.d/xml-valid
===================================================================
--- trunk/make.d/xml-valid	2007-05-25 19:48:56 UTC (rev 6577)
+++ trunk/make.d/xml-valid	2007-05-25 19:58:20 UTC (rev 6578)
Changed blocks: 1, Lines Added: 3, Lines Deleted: 0; 426 bytes

@@ -29,6 +29,9 @@
 %.ulinkcheck: %-prepped.xml
 	$(DEEP_CHECK) --checkulink $<
 
+%.spell: %-prepped.xml
+	$(SPELL_CHECK) $<
+
 ifdef IDMAP_REFS
 
 # None of the following rules are defined at all unless id-mapping


Added: trunk/tools/spell-check.pl
===================================================================
--- trunk/tools/spell-check.pl	                        (rev 0)
+++ trunk/tools/spell-check.pl	2007-05-25 19:58:20 UTC (rev 6578)
Changed blocks: 1, Lines Added: 201, Lines Deleted: 0; 4752 bytes

@@ -0,0 +1,201 @@
+#! /usr/bin/perl -w
+# vim:set ts=2 sw=2 expandtab:
+
+# spell-check.pl - perform a deep check of the DocBook source
+#
+
+# Martin MC Brown
+# mc@stripped
+# 2006-11-23
+
+use strict;
+use Getopt::Long;
+use Data::Dumper;
+
+my ($lang,$dictbase,$help) = ('en','../dicts',0);
+
+GetOptions("lang" => \$lang,
+           "dictbase" => \$dictbase,
+           "help" => \$help,
+           );
+
+if ($help)
+{
+    print <<EOF;
+spell-check.pl [--lang=language] [--dictbase=dir] file.xml
+
+Where:
+ --checkwidth:    Enables width checking of <progralisting> elements 
+                  (default length is 72 characters)
+ --checkimages:   Compares images in the current images directory
+                  With those found in the XML and reports differences
+ --linelength=#:  Sets the maximum line length (when --checkwidth 
+                  enabled)
+EOF
+
+    exit 0;
+}
+
+eval "require XML::Parser::PerlSAX;";
+
+if ($@) 
+{
+    die <<EOF;
+ERROR: Cannot load the PerlSAX parser.
+
+You need to install the expat library and the XML::Parser::PerlSAX module for perl. 
+Either do it by hand:
+ - libexpat is available from http://expat.sourceforge.net
+ - PerlSAX is available from http://search.cpan.org/~kmacleod/libxml-perl-0.08/lib/XML/Parser/PerlSAX.pm
+
+Using CPAN: 
+ - Install libexpat
+ - Run: 
+
+\$ perl -MCPAN -e 'install XML::Parser::PerlSAX'
+
+Using apt-get:
+
+\$ apt-get install libexpat-dev
+\$ perl -MCPAN -e 'install XML::Parser::PerlSAX'
+
+Using YaST: 
+
+\$ yast -i expat
+\$ perl -MCPAN -e 'install XML::Parser::PerlSAX'
+
+Please install and try again.
+
+EOF
+}
+
+my $file = shift or die "You must supply the name of the file to process";
+
+my $my_handler = MySQLDocBook->new();
+
+XML::Parser::PerlSAX->new->parse(Source => { SystemId => $file}, 
+                                 Handler => $my_handler);
+
+my $words = {};
+my $terms = {};
+my $termscheck = {};
+
+loadwords(sprintf('%s/%s.base',$dictbase,$lang),$words);
+loadwords(sprintf('%s/%s.additional',$dictbase,$lang),$words);
+loadwords(sprintf('%s/%s.terms',$dictbase,$lang),$terms);
+loadwords(sprintf('%s/%s.autoterms',$dictbase,$lang),$terms);
+
+my $unknown = {};
+my $badterm = {};
+
+foreach my $word (keys %{$my_handler->{wordlist}})
+{
+    next if (exists($words->{$word}) or exists($words->{lc($word)}));
+    next if (exists($terms->{$word}));
+
+    if (exists($termscheck->{lc($word)}))
+    {
+        $badterm->{$word} = $termscheck->{lc($word)};
+        next;
+    }
+
+    $unknown->{$word} = 1;
+}
+
+print "The following terms have bad capitalization\n";
+foreach my $word (sort keys %{$badterm})
+{
+    print "\t$word (should be $badterm->{$word})\n";
+}
+print "\n";
+print "The following words couldn't be found\n";
+foreach my $word (sort keys %{$unknown})
+{
+    print "\t$word\n";
+}
+
+sub loadwords
+{
+    my ($file,$dest,$lcdest) = @_;
+
+    open(WORDS,$file) or warn "Can't open $file\n";
+    while(<WORDS>)
+    {
+        chomp;
+        $dest->{$_} = 1;
+        $lcdest->{lc($_)} = $_ if (defined($lcdest));
+    }
+    close(WORDS);
+}
+
+package MySQLDocBook;
+use File::Basename;
+
+sub new
+{
+    my $self = shift;
+    my $class = ref($self) || $self;
+    my $options = shift;
+
+
+    return bless {'wordlist' => {},
+                  'parsewords' => 0,
+                  'wordbuffer' => '',
+              }, $class;
+}
+
+sub start_element
+{
+    my ($self, $element) = @_;
+
+    if ($element->{Name} =~ m/^(literal|programlisting|option|userinput|replaceable|remark)$/)
+    {
+        push @{$self->{state}},$self->{parsewords};
+        $self->{parsewords} = 0;
+    }
+    else
+    {
+        $self->{parsewords} = 1;
+    }
+}
+
+sub end_element
+{
+    my ($self, $element) = @_;
+
+    if ($element->{Name} =~ m/^(literal|programlisting|option|userinput|replaceable|remark)$/)
+    {
+        $self->{parsewords} = pop @{$self->{state}};
+    }
+
+    if (length($self->{wordbuffer}) > 0)
+    {
+        my @words = split /[\s(),\.;\?:"\/]+/,$self->{wordbuffer};
+        foreach my $word (@words)
+        {
+            next unless ($word =~ m/[a-z]/i);
+            if (exists($self->{wordlist}->{$word}))
+            {
+                $self->{wordlist}->{$word}++;
+            }
+            else
+            {
+                $self->{wordlist}->{$word} = 1;
+            }
+        }
+$self->{wordbuffer} = '';
+    }
+    
+}    
+
+sub characters
+{
+    my ($self, $element) = @_;
+
+    if ($self->{parsewords} != 0)
+    {
+        $self->{wordbuffer} .= $element->{Data};
+    }
+}
+
+1;


Property changes on: trunk/tools/spell-check.pl
___________________________________________________________________
Name: svn:executable
   + *


Thread
svn commit - mysqldoc@docsrva: r6578 - in trunk: make.d toolsmcbrown25 May