Author: paul
Date: 2005-11-09 02:07:24 +0100 (Wed, 09 Nov 2005)
New Revision: 298
Log:
r3578@frost: paul | 2005-11-08 19:04:04 -0600
Delete old file.
Update xmlformat.
Removed:
trunk/make.d/shell-vars
Modified:
trunk/
trunk/tools/manual-helper-prettyprint.pl
Property changes on: trunk
___________________________________________________________________
Name: svk:merge
- b5ec3a16-e900-0410-9ad2-d183a3acac99:/mysqldoc-local/mysqldoc/trunk:3477
bf112a9c-6c03-0410-a055-ad865cd57414:/mysqldoc-local/mysqldoc/trunk:340
+ b5ec3a16-e900-0410-9ad2-d183a3acac99:/mysqldoc-local/mysqldoc/trunk:3578
bf112a9c-6c03-0410-a055-ad865cd57414:/mysqldoc-local/mysqldoc/trunk:340
Deleted: trunk/make.d/shell-vars
Modified: trunk/tools/manual-helper-prettyprint.pl
===================================================================
--- trunk/tools/manual-helper-prettyprint.pl 2005-11-08 06:11:26 UTC (rev 297)
+++ trunk/tools/manual-helper-prettyprint.pl 2005-11-09 01:07:24 UTC (rev 298)
@@ -3,7 +3,7 @@
# xmlformat - configurable XML file formatter/pretty-printer
-# Copyright (c) 2004, Kitebird, LLC. All rights reserved.
+# Copyright (c) 2004, 2005 Kitebird, LLC. All rights reserved.
# Some portions are based on the REX shallow XML parser, which
# is Copyright (c) 1998, Robert D. Cameron. These include the
# regular expression parsing variables and the shallow_parse()
@@ -12,12 +12,11 @@
# This software is licensed as described below:
#
=pod
-
xmlformat is distributed under a BSD-style license. This license
applies to the entire xmlformat distribution, with the exception of
the REX parser (described below).
-Copyright (c) 2004, Kitebird, LLC. All rights reserved.
+Copyright (c) 2004, 2005, Kitebird, LLC. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -54,20 +53,9 @@
http://www.cs.sfu.ca/~cameron/REX.html
-The document contains a Perl implementation of REX:
-
---- begin REX code ---
+The document contains a Perl implementation of REX.
=cut
-# REX/Perl 1.0
-# Robert D. Cameron "REX: XML Shallow Parsing with Regular Expressions",
-# Technical Report TR 1998-17, School of Computing Science, Simon Fraser
-# University, November, 1998.
-# Copyright (c) 1998, Robert D. Cameron.
-# The following code may be freely used and distributed provided that
-# this copyright and citation notice remains intact and that modifications
-# or additions are clearly identified.
-
# Syntax: xmlformat [config-file] xml-file
# Default config file is $ENV{XMLFORMAT_CONF} or ./xmlformat.conf, in that
@@ -130,8 +118,9 @@
$Getopt::Long::ignorecase = 0; # options are case sensitive
$Getopt::Long::bundling = 1; # allow short options to be bundled
-my $XMLFORMAT_VERSION = "1.03";
-my $prog_name = "xmlformat";
+my $PROG_NAME = "xmlformat";
+my $PROG_VERSION = "1.04";
+my $PROG_LANG = "Perl";
# ----------------------------------------------------------------------
@@ -310,6 +299,10 @@
$self->{tokens} = [ ];
+ # List of line numbers for each token
+
+ $self->{line_num} = [ ];
+
# Document node tree (constructed from the token list).
$self->{tree} = [ ];
@@ -658,7 +651,7 @@
# Main document processing routine.
# - Argument is a string representing an input document
# - Return value is the reformatted document, or undef. An undef return
-# signifies either than an error occurred, or that some option was
+# signifies either that an error occurred, or that some option was
# given that suppresses document output. In either case, don't write
# any output for the document. Any error messages will already have
# been printed when this returns.
@@ -690,6 +683,9 @@
return undef;
}
+ # Assign input line number to each token
+ $self->assign_line_numbers ();
+
# Look for and report any error tokens returned by parser
warn "Checking document for errors...\n" if $verbose;
if ($self->report_errors () > 0)
@@ -755,9 +751,10 @@
return $str;
}
-
# ----------------------------------------------------------------------
+# Parse XML document into array of tokens and store array
+
sub shallow_parse
{
my ($self, $xml_document) = @_;
@@ -765,6 +762,8 @@
$self->{tokens} = [ $xml_document =~ /$XML_SPE/g ];
}
+# ----------------------------------------------------------------------
+
# Extract a tag name from a tag and return it.
# Dies if the tag cannot be found, because this is supposed to be
@@ -780,6 +779,26 @@
# ----------------------------------------------------------------------
+# Assign an input line number to each token. The number indicates
+# the line number on which the token begins.
+
+sub assign_line_numbers
+{
+my $self = shift;
+my $line_num = 1;
+
+ $self->{line_num} = [ ];
+ for (my $i = 0; $i < @{$self->{tokens}}; $i++)
+ {
+ my $token = $self->{tokens}->[$i];
+ push (@{$self->{line_num}}, $line_num);
+ # count newlines and increment line counter (tr returns no. of matches)
+ $line_num += ($token =~ tr/\n/\n/);
+ }
+}
+
+# ----------------------------------------------------------------------
+
# Check token list for errors and report any that are found. Error
# tokens are those that begin with "<" but do not end with ">".
@@ -797,7 +816,8 @@
my $token = $self->{tokens}->[$i];
if ($token =~ /^</ && $token !~ />$/)
{
- warn "Error (token " . ($i+1) . "): $token\n";
+ my $line_num = $self->{line_num}->[$i];
+ warn "Malformed token at line $line_num, token " . ($i+1) . ": $token\n";
++$err_count;
}
}
@@ -807,6 +827,25 @@
# ----------------------------------------------------------------------
+# Helper routine to print tag stack for tokens_to_tree
+
+sub print_tag_stack
+{
+my ($label, @stack) = @_;
+ if (@stack < 1)
+ {
+ warn " $label: none\n";
+ }
+ else
+ {
+ warn " $label:\n";
+ for (my $i = 0; $i < @stack; $i++)
+ {
+ warn " ", ($i+1), ": ", $stack[$i], "\n";
+ }
+ }
+}
+
# Convert the list of XML document tokens to a tree representation.
# The implementation uses a loop and a stack rather than recursion.
@@ -826,6 +865,8 @@
for (my $i = 0; $i < @{$self->{tokens}}; $i++)
{
my $token = $self->{tokens}->[$i];
+ my $line_num = $self->{line_num}->[$i];
+ my $tok_err = "Error near line $line_num, token " . ($i+1) . " ($token)";
if ($token !~ /^</) # text
{
push (@{$children}, text_node ($token));
@@ -850,13 +891,13 @@
{
if (!@tag_stack)
{
- warn "Empty tag stack; malformed document?\n";
+ warn "$tok_err: Close tag w/o preceding open tag; malformed document?\n";
++$err_count;
next;
}
if (!@children_stack)
{
- warn "Empty children stack; malformed document?\n";
+ warn "$tok_err: Empty children stack; malformed document?\n";
++$err_count;
next;
}
@@ -865,8 +906,10 @@
my $close_tag_name = extract_tag_name ($token);
if ($open_tag_name ne $close_tag_name)
{
- warn "MISMATCH open ($open_tag_name), close ($close_tag_name);",
- " malformed document?\n";
+ warn "$tok_err: Tag mismatch; malformed document?\n";
+ warn " open tag: $tag\n";
+ warn " close tag: $token\n";
+ print_tag_stack ("enclosing tags", @tag_stack);
++$err_count;
next;
}
@@ -899,12 +942,14 @@
if (@tag_stack)
{
- warn "Non-empty tag stack; malformed document?\n";
+ warn "Error at EOF: Unclosed tags; malformed document?\n";
+ print_tag_stack ("unclosed tags", @tag_stack);
++$err_count;
}
if (@children_stack)
{
- warn "Non-empty children stack; malformed document?\n";
+ warn "Error at EOF: Unprocessed child elements; malformed document?\n";
+# TODO: print out info about them
++$err_count;
}
@@ -1540,11 +1585,11 @@
my $usage = <<EOF;
-Usage: $prog_name [options] xml-file
+Usage: $PROG_NAME [options] xml-file
Options:
--help, -h
- Print this message
+ Print this message and exit.
--backup suffix -b suffix
Back up the input document, adding suffix to the input
filename to create the backup filename.
@@ -1615,7 +1660,7 @@
if (defined ($show_version))
{
- print "$prog_name $XMLFORMAT_VERSION\n";
+ print "$PROG_NAME $PROG_VERSION ($PROG_LANG version)\n";
exit (0);
}
| Thread |
|---|
| • svn commit - mysqldoc@docsrva: r298 - in trunk: . make.d tools | paul | 9 Nov |