Windows-Server-2003/tools/parsetable.pm

225 lines
7.0 KiB
Perl

#---------------------------------------------------------------------
package ParseTable;
#
# Copyright (c) Microsoft Corporation. All rights reserved.
#
# Version: 1.00 (07/12/2000) : (JeremyD) inital version
# 1.01 (08/25/2000) : (JeremyD) allow single heading tables
#---------------------------------------------------------------------
use strict;
use vars qw(@ISA @EXPORT $VERSION);
use IO::File;
use Carp;
use Exporter;
@ISA = qw(Exporter);
@EXPORT = qw(parse_table_lines parse_table_file);
$VERSION = '1.01';
sub parse_table_lines (\@;$) {
my $lines_ref = shift; # the array of lines is modified in place
my $storage = shift; # an array or hash ref to stuff the data in, if
# this is not a ref we quietly discard the data
# this could be useful to skip one table
my @heading; # the current set of headings
LINE:
while (my $line = shift @$lines_ref) {
chomp $line;
next LINE if $line =~ /^\s*$/; # skip empty lines
if ($line =~ /^\s*[#;](.*)/) { # comments may contain headings
my $comment = $1;
if ($comment =~ /^\s*(?:\[\w+\]\s*)+$/) { # bracketed names seperated
# by whitespace
if (@heading) { # already have headings, must be a new table
unshift @$lines_ref, $line; # this line is part of the next
# table, we need to put it back
last LINE; # a new table implies the end of the current one
} else { # found our first set of headings
while ($comment =~ /\[(\w+)\]/g) { # look for headings
push @heading, $1;
}
}
}
next LINE; # done parsing this comment
}
next unless @heading; # no data processing until we have our headings
# fields are seperated by 2 or more white space characters, however
# a single tab will also suffice
my @data = split /(?=\t)\s+|\s{2,}/, $line;
next unless $#heading == $#data; # require 1 data field per heading
# use our current headings as keys and make a hash of the data
my %hash;
for (my $i=0; $i<@heading; $i++) {
$hash{$heading[$i]} = $data[$i];
}
# store our current line's data in the reference passed to us
if (ref $storage eq 'ARRAY') {
push @$storage, \%hash;
} elsif (ref $storage eq 'HASH') {
$storage->{$data[0]} = \%hash;
} else {
# do nothing
# this allows skipping a table by passing in a non-ref storage
}
}
# the data array was modified in place, parsed lines have been removed
# successive calls will parse any remaining tables found in the data array
# return the number of unparsed lines, 0 indicates no remaining tables
return scalar @$lines_ref;
}
sub parse_table_file ($;@) {
my $filename = shift;
my @store_refs = @_;
my $fh = new IO::File $filename, "r";
if (defined $fh) {
my @lines = $fh->getlines;
my $i = 0;
while (@lines) {
parse_table_lines(@lines, $store_refs[$i++]);
}
undef $fh;
} else {
croak "Unable to open file $filename: $!";
}
}
1;
__END__
=head1 NAME
ParseTable - Extract data from a formatted text table
=head1 SYNOPSIS
use ParseTable;
parse_table_file("foobar.txt", \%table_one, \@table_two, ...);
$lines_remaining = parse_table_lines(@data_lines,\%table);
=head1 DESCRIPTION
This module provides an easy way to extract formatted data from text files.
=over 4
=item parse_table_file( $filename, @storage_refs )
parse_table_file takes a filename to parse and a list of storage locations
for the tables found within that file.
=item parse_table_lines( @data_lines, $storage_ref )
parse_table_lines takes an array of data lines and a storage location for
the first table found in the lines. It modifies the array in place and returns
the number of unparsed lines.
=back
The format for a table is:
;comments
; [heading1] [heading2]
item1 item2
item3 with internal space item4
item5 item6
Each line of data in a table is stored as a hash with the heading names as
keys and the data items as values.
If an array reference is specified as the storage location the data hash for
each line will be pushed on to the array.
If the storage location is a hash reference then the data hash for each line
will be stored using the value of the first column as the key. In the case of
duplicate data items the last one appearing in the table takes precedence.
=head1 EXAMPLES
parse_table_file("codetable.txt",\@data)
for $data (@data) {
print "$data->{Lang} is the lang code for $data->{Comments}\n";
}
parse_table_file("codetable.txt",\%data,\%flavors)
print "your site is $data->{$user_lang}{Site}\n";
print "your flavor is $flavors->{$user_lang}{$user_arch}\n";
codetable.txt:
;
; This is just an example of a file with two tables
;
;[Lang] [LCID] [Class] [Site] [Comments]
;-------------------------------------------------------------
;
ARA 0x0401 @CS REDMOND Arabic
CHS 0x0804 @FE REDMOND Chinese Simplified (PR China)
CHT 0x0404 @FE REDMOND Chinese Traditional (Taiwan Region)
CHH 0x0404 @FE REDMOND Chinese Traditional (Hong Kong Region)
FR 0x040C @EU DUBLIN French
GER 0x0407 @EU REDMOND German
;[Lang] [x86] [ia64]
;=============================================
USA per;pro;srv;ads;dtc pro;ads;dtc
GER per;pro;srv;ads pro;ads
CHT per;pro;srv;ads pro;ads
CHH per;pro;srv;ads pro;ads
CHS per;pro;srv;ads pro;ads
ARA per;pro pro
=head1 NOTES
The parser can handle blank lines and comments beginning with either ';' or
'#'.
A heading line must appear before any data lines. A heading line is a special
form of comment consisting of field names enclosed in brackets [].
Data lines must have exactly as many fields as heading lines.
Data fields must be seperated by 2 or more spaces. Single spaces within data
items do not require quoting or escaping.
Quoting and escaping are not supported in any way. This means you may not
have a data field with the value "" (empty string) or more than 1 space in a row.
Storage locations are not before parsing begins.
Heading names must match the regex /\w+/.
Should probably be expanded to handle returning a plain array for single column
tables (lists of filenames, etc).
=head1 SEE ALSO
hashtext.pm
=head1 AUTHOR
Jeremy Devenport <JeremyD>
=head1 COPYRIGHT
Copyright (c) Microsoft Corporation. All rights reserved.
=cut