sakharamg's picture
Uploading all files
158b61b
raw
history blame contribute delete
926 Bytes
#!/usr/bin/env python
# Auxiliary script to convert fast_align output to the "bal" input format
# that symal requires.
# Script by Ulrich Germann.
# command line args:
# <L1 plain text> <L2 plain text> <L1-L2 alignments> <L2-L1 alignments>
#
# TO DO: - proper argument parsing with getopt
# - help text
import sys,os
(T1,T2,fwd,bwd) = [open(x) for x in sys.argv[1:]]
def alnvec(slen,alinks,mode):
d = dict([[int(x[mode]),int(x[(mode+1)%2])+1] for x
in [y.split('-') for y in alinks]])
return [d.get(i,0) for i in xrange(slen)]
ctr = 0
for t1 in T1:
t1 = t1.strip().split()
t2 = T2.readline().strip().split()
a1 = alnvec(len(t1),bwd.readline().split(),0)
a2 = alnvec(len(t2),fwd.readline().split(),1)
print 1
print len(t2), " ".join(t2), '#', " ".join(["%d"%x for x in a2])
print len(t1), " ".join(t1), '#', " ".join(["%d"%x for x in a1])
ctr += 1
pass