#!/usr/bin/env perl # # This file is part of moses. Its use is licensed under the GNU Lesser General # Public License version 2.1 or, at your option, any later version. use warnings; use strict; die("ERROR syntax: input-from-sgm.perl < in.sgm > in.txt") unless scalar @ARGV == 0; while(my $line = ) { chop($line); while ($line =~ /]+>\s*$/i) { my $next_line = ; $line .= $next_line; chop($line); } while ($line =~ /]+>\s*(.*)\s*$/i && $line !~ /]+>\s*(.*)\s*<\/seg>/i) { my $next_line = ; $line .= $next_line; chop($line); } if ($line =~ /]+>\s*(.*)\s*<\/seg>/i) { my $input = $1; $input =~ s/\s+/ /g; $input =~ s/^ //g; $input =~ s/ $//g; print $input."\n"; } }