#!/usr/bin/env python # Auxiliary script to convert fast_align output to the "bal" input format # that symal requires. # Script by Ulrich Germann. # command line args: # # # TO DO: - proper argument parsing with getopt # - help text import sys,os (T1,T2,fwd,bwd) = [open(x) for x in sys.argv[1:]] def alnvec(slen,alinks,mode): d = dict([[int(x[mode]),int(x[(mode+1)%2])+1] for x in [y.split('-') for y in alinks]]) return [d.get(i,0) for i in xrange(slen)] ctr = 0 for t1 in T1: t1 = t1.strip().split() t2 = T2.readline().strip().split() a1 = alnvec(len(t1),bwd.readline().split(),0) a2 = alnvec(len(t2),fwd.readline().split(),1) print 1 print len(t2), " ".join(t2), '#', " ".join(["%d"%x for x in a2]) print len(t1), " ".join(t1), '#', " ".join(["%d"%x for x in a1]) ctr += 1 pass