Author: mcbrown
Date: 2007-05-25 21:58:20 +0200 (Fri, 25 May 2007)
New Revision: 6578
Log:
Adding spell checking tool
Added:
trunk/tools/spell-check.pl
Modified:
trunk/make.d/vars-docbook
trunk/make.d/xml-valid
Property changes on: trunk/tools/spell-check.pl
___________________________________________________________________
Name: svn:executable
+ *
Modified: trunk/make.d/vars-docbook
===================================================================
--- trunk/make.d/vars-docbook 2007-05-25 19:48:56 UTC (rev 6577)
+++ trunk/make.d/vars-docbook 2007-05-25 19:58:20 UTC (rev 6578)
Changed blocks: 1, Lines Added: 4, Lines Deleted: 0; 531 bytes
@@ -130,6 +130,10 @@
DEEP_CHECK = $(TOOLS_DIR)/deep-check.pl
+# SPELL_CHECK performs a spell check of the MySQL DocBook XML files
+
+SPELL_CHECK = $(TOOLS_DIR)/spell-check.pl
+
# GENARBITRARY takes an aspec file and template and generates and arbitrary doc
GENARBITRARY = $(TOOLS_DIR)/genarbelements.pl
Modified: trunk/make.d/xml-valid
===================================================================
--- trunk/make.d/xml-valid 2007-05-25 19:48:56 UTC (rev 6577)
+++ trunk/make.d/xml-valid 2007-05-25 19:58:20 UTC (rev 6578)
Changed blocks: 1, Lines Added: 3, Lines Deleted: 0; 426 bytes
@@ -29,6 +29,9 @@
%.ulinkcheck: %-prepped.xml
$(DEEP_CHECK) --checkulink $<
+%.spell: %-prepped.xml
+ $(SPELL_CHECK) $<
+
ifdef IDMAP_REFS
# None of the following rules are defined at all unless id-mapping
Added: trunk/tools/spell-check.pl
===================================================================
--- trunk/tools/spell-check.pl (rev 0)
+++ trunk/tools/spell-check.pl 2007-05-25 19:58:20 UTC (rev 6578)
Changed blocks: 1, Lines Added: 201, Lines Deleted: 0; 4752 bytes
@@ -0,0 +1,201 @@
+#! /usr/bin/perl -w
+# vim:set ts=2 sw=2 expandtab:
+
+# spell-check.pl - perform a deep check of the DocBook source
+#
+
+# Martin MC Brown
+# mc@stripped
+# 2006-11-23
+
+use strict;
+use Getopt::Long;
+use Data::Dumper;
+
+my ($lang,$dictbase,$help) = ('en','../dicts',0);
+
+GetOptions("lang" => \$lang,
+ "dictbase" => \$dictbase,
+ "help" => \$help,
+ );
+
+if ($help)
+{
+ print <<EOF;
+spell-check.pl [--lang=language] [--dictbase=dir] file.xml
+
+Where:
+ --checkwidth: Enables width checking of <progralisting> elements
+ (default length is 72 characters)
+ --checkimages: Compares images in the current images directory
+ With those found in the XML and reports differences
+ --linelength=#: Sets the maximum line length (when --checkwidth
+ enabled)
+EOF
+
+ exit 0;
+}
+
+eval "require XML::Parser::PerlSAX;";
+
+if ($@)
+{
+ die <<EOF;
+ERROR: Cannot load the PerlSAX parser.
+
+You need to install the expat library and the XML::Parser::PerlSAX module for perl.
+Either do it by hand:
+ - libexpat is available from http://expat.sourceforge.net
+ - PerlSAX is available from http://search.cpan.org/~kmacleod/libxml-perl-0.08/lib/XML/Parser/PerlSAX.pm
+
+Using CPAN:
+ - Install libexpat
+ - Run:
+
+\$ perl -MCPAN -e 'install XML::Parser::PerlSAX'
+
+Using apt-get:
+
+\$ apt-get install libexpat-dev
+\$ perl -MCPAN -e 'install XML::Parser::PerlSAX'
+
+Using YaST:
+
+\$ yast -i expat
+\$ perl -MCPAN -e 'install XML::Parser::PerlSAX'
+
+Please install and try again.
+
+EOF
+}
+
+my $file = shift or die "You must supply the name of the file to process";
+
+my $my_handler = MySQLDocBook->new();
+
+XML::Parser::PerlSAX->new->parse(Source => { SystemId => $file},
+ Handler => $my_handler);
+
+my $words = {};
+my $terms = {};
+my $termscheck = {};
+
+loadwords(sprintf('%s/%s.base',$dictbase,$lang),$words);
+loadwords(sprintf('%s/%s.additional',$dictbase,$lang),$words);
+loadwords(sprintf('%s/%s.terms',$dictbase,$lang),$terms);
+loadwords(sprintf('%s/%s.autoterms',$dictbase,$lang),$terms);
+
+my $unknown = {};
+my $badterm = {};
+
+foreach my $word (keys %{$my_handler->{wordlist}})
+{
+ next if (exists($words->{$word}) or exists($words->{lc($word)}));
+ next if (exists($terms->{$word}));
+
+ if (exists($termscheck->{lc($word)}))
+ {
+ $badterm->{$word} = $termscheck->{lc($word)};
+ next;
+ }
+
+ $unknown->{$word} = 1;
+}
+
+print "The following terms have bad capitalization\n";
+foreach my $word (sort keys %{$badterm})
+{
+ print "\t$word (should be $badterm->{$word})\n";
+}
+print "\n";
+print "The following words couldn't be found\n";
+foreach my $word (sort keys %{$unknown})
+{
+ print "\t$word\n";
+}
+
+sub loadwords
+{
+ my ($file,$dest,$lcdest) = @_;
+
+ open(WORDS,$file) or warn "Can't open $file\n";
+ while(<WORDS>)
+ {
+ chomp;
+ $dest->{$_} = 1;
+ $lcdest->{lc($_)} = $_ if (defined($lcdest));
+ }
+ close(WORDS);
+}
+
+package MySQLDocBook;
+use File::Basename;
+
+sub new
+{
+ my $self = shift;
+ my $class = ref($self) || $self;
+ my $options = shift;
+
+
+ return bless {'wordlist' => {},
+ 'parsewords' => 0,
+ 'wordbuffer' => '',
+ }, $class;
+}
+
+sub start_element
+{
+ my ($self, $element) = @_;
+
+ if ($element->{Name} =~ m/^(literal|programlisting|option|userinput|replaceable|remark)$/)
+ {
+ push @{$self->{state}},$self->{parsewords};
+ $self->{parsewords} = 0;
+ }
+ else
+ {
+ $self->{parsewords} = 1;
+ }
+}
+
+sub end_element
+{
+ my ($self, $element) = @_;
+
+ if ($element->{Name} =~ m/^(literal|programlisting|option|userinput|replaceable|remark)$/)
+ {
+ $self->{parsewords} = pop @{$self->{state}};
+ }
+
+ if (length($self->{wordbuffer}) > 0)
+ {
+ my @words = split /[\s(),\.;\?:"\/]+/,$self->{wordbuffer};
+ foreach my $word (@words)
+ {
+ next unless ($word =~ m/[a-z]/i);
+ if (exists($self->{wordlist}->{$word}))
+ {
+ $self->{wordlist}->{$word}++;
+ }
+ else
+ {
+ $self->{wordlist}->{$word} = 1;
+ }
+ }
+$self->{wordbuffer} = '';
+ }
+
+}
+
+sub characters
+{
+ my ($self, $element) = @_;
+
+ if ($self->{parsewords} != 0)
+ {
+ $self->{wordbuffer} .= $element->{Data};
+ }
+}
+
+1;
Property changes on: trunk/tools/spell-check.pl
___________________________________________________________________
Name: svn:executable
+ *
| Thread |
|---|
| • svn commit - mysqldoc@docsrva: r6578 - in trunk: make.d tools | mcbrown | 25 May |