sakharamg's picture
Uploading all files
158b61b
raw
history blame contribute delete
763 Bytes
#!/usr/bin/env perl
#
# This file is part of moses. Its use is licensed under the GNU Lesser General
# Public License version 2.1 or, at your option, any later version.
use warnings;
use strict;
die("ERROR syntax: input-from-sgm.perl < in.sgm > in.txt")
unless scalar @ARGV == 0;
while(my $line = <STDIN>) {
chop($line);
while ($line =~ /<seg[^>]+>\s*$/i) {
my $next_line = <STDIN>;
$line .= $next_line;
chop($line);
}
while ($line =~ /<seg[^>]+>\s*(.*)\s*$/i &&
$line !~ /<seg[^>]+>\s*(.*)\s*<\/seg>/i) {
my $next_line = <STDIN>;
$line .= $next_line;
chop($line);
}
if ($line =~ /<seg[^>]+>\s*(.*)\s*<\/seg>/i) {
my $input = $1;
$input =~ s/\s+/ /g;
$input =~ s/^ //g;
$input =~ s/ $//g;
print $input."\n";
}
}