|
#include <iostream> |
|
|
|
#include <sstream> |
|
#include <vector> |
|
#include <string> |
|
#include <iterator> |
|
#include <functional> |
|
#include <sys/stat.h> |
|
#include "moses/TypeDef.h" |
|
#include "moses/ConfusionNet.h" |
|
#include "moses/FactorCollection.h" |
|
#include "moses/Phrase.h" |
|
#include "moses/InputFileStream.h" |
|
#include "moses/Timer.h" |
|
#include "moses/TranslationModel/PhraseDictionaryTree.h" |
|
|
|
using namespace std; |
|
using namespace Moses; |
|
|
|
Timer timer; |
|
|
|
template<typename T> |
|
std::ostream& operator<<(std::ostream& out,const std::vector<T>& x) |
|
{ |
|
out<<x.size()<<" "; |
|
typename std::vector<T>::const_iterator iend=x.end(); |
|
for(typename std::vector<T>::const_iterator i=x.begin(); i!=iend; ++i) |
|
out<<*i<<' '; |
|
return out; |
|
} |
|
|
|
inline bool existsFile(const char* filename) |
|
{ |
|
struct stat mystat; |
|
return (stat(filename,&mystat)==0); |
|
} |
|
inline bool existsFile(const std::string& filename) |
|
{ |
|
return existsFile(filename.c_str()); |
|
} |
|
|
|
int main(int argc,char **argv) |
|
{ |
|
std::string fto; |
|
size_t noScoreComponent=5; |
|
int cn=0; |
|
bool aligninfo=true; |
|
std::vector<std::pair<std::string,std::pair<char*,char*> > > ftts; |
|
int verb=0; |
|
for(int i=1; i<argc; ++i) { |
|
std::string s(argv[i]); |
|
if(s=="-ttable") { |
|
std::pair<char*,char*> p; |
|
p.first=argv[++i]; |
|
p.second=argv[++i]; |
|
ftts.push_back(std::make_pair(std::string(argv[++i]),p)); |
|
} else if(s=="-nscores") noScoreComponent=atoi(argv[++i]); |
|
else if(s=="-out") fto=std::string(argv[++i]); |
|
else if(s=="-cn") cn=1; |
|
else if(s=="-irst") cn=2; |
|
else if(s=="-no-alignment-info") aligninfo=false; |
|
else if(s=="-v") verb=atoi(argv[++i]); |
|
else if(s=="-h") { |
|
std::cerr<<"usage "<<argv[0]<<" :\n\n" |
|
"options:\n" |
|
"\t-ttable int int string -- translation table file, use '-' for stdin\n" |
|
"\t-out string -- output file name prefix for binary ttable\n" |
|
"\t-nscores int -- number of scores in ttable\n" |
|
"\t-no-alignment-info -- omit alignment info from the binary ttable \n" |
|
"\nfunctions:\n" |
|
"\t - convert ascii ttable in binary format\n" |
|
"\t - if ttable is not read from stdin:\n" |
|
"\t treat each line as source phrase an print tgt candidates\n" |
|
"\n"; |
|
return 1; |
|
} else { |
|
std::cerr<<"ERROR: unknown option '"<<s<<"'\n"; |
|
return 1; |
|
} |
|
} |
|
|
|
|
|
if(ftts.size()) { |
|
|
|
if(ftts.size()==1) { |
|
std::cerr<<"processing ptree for "; |
|
PhraseDictionaryTree pdt; |
|
|
|
pdt.PrintWordAlignment(aligninfo); |
|
|
|
if (ftts[0].first=="-") { |
|
std::cerr<< "stdin\n"; |
|
pdt.Create(std::cin,fto); |
|
} else { |
|
std::cerr<< ftts[0].first << "\n"; |
|
InputFileStream in(ftts[0].first); |
|
pdt.Create(in,fto); |
|
} |
|
} else { |
|
#if 0 |
|
std::vector<PhraseDictionaryTree const*> pdicts; |
|
std::vector<FactorType> factorOrder; |
|
for(size_t i=0; i<ftts.size(); ++i) { |
|
|
|
PhraseDictionaryTree *pdtptr=new PhraseDictionaryTree(noScoreComponent, |
|
&factorCollection, |
|
getFactorType(atoi(ftts[i].second.first)), |
|
getFactorType(atoi(ftts[i].second.second)) |
|
); |
|
factorOrder.push_back(pdtptr->GetInputFactorType()); |
|
PhraseDictionaryTree &pdt=*pdtptr; |
|
pdicts.push_back(pdtptr); |
|
|
|
std::string facStr="."+std::string(ftts[i].second.first)+"-"+std::string(ftts[i].second.second); |
|
std::string prefix=ftts[i].first+facStr; |
|
if(!existsFile(prefix+".binphr.idx")) { |
|
std::cerr<<"bin ttable does not exist -> create it\n"; |
|
InputFileStream in(prefix); |
|
pdt.Create(in,prefix); |
|
} |
|
std::cerr<<"reading bin ttable\n"; |
|
pdt.Read(prefix); |
|
|
|
} |
|
|
|
std::cerr<<"processing stdin\n"; |
|
if(!cn) { |
|
std::string line; |
|
while(getline(std::cin,line)) { |
|
std::istringstream is(line); |
|
#if 0 |
|
std::vector<std::string> f; |
|
std::copy(std::istream_iterator<std::string>(is), |
|
std::istream_iterator<std::string>(), |
|
std::back_inserter(f)); |
|
#endif |
|
std::cerr<<"got source phrase '"<<line<<"'\n"; |
|
|
|
Phrase F(Input); |
|
F.CreateFromString(factorOrder,line,factorCollection); |
|
|
|
for(size_t k=0; k<pdicts.size(); ++k) { |
|
PhraseDictionaryTree const& pdt=*pdicts[k]; |
|
|
|
std::vector<std::string> f(F.GetSize()); |
|
for(size_t i=0; i<F.GetSize(); ++i) |
|
f[i]=F.GetFactor(i,pdt.GetInputFactorType())->ToString(); |
|
|
|
std::stringstream iostA,iostB; |
|
std::cerr<<"full phrase processing "<<f<<"\n"; |
|
pdt.PrintTargetCandidates(f,iostA); |
|
|
|
std::cerr<<"processing with prefix ptr\n"; |
|
PhraseDictionaryTree::PrefixPtr p(pdt.GetRoot()); |
|
|
|
for(size_t i=0; i<f.size() && p; ++i) { |
|
std::cerr<<"pre "<<i<<" "<<(p?"1":"0")<<"\n"; |
|
p=pdt.Extend(p,f[i]); |
|
std::cerr<<"post "<<i<<" "<<(p?"1":"0")<<"\n"; |
|
} |
|
if(p) { |
|
std::cerr<<"retrieving candidates from prefix ptr\n"; |
|
pdt.PrintTargetCandidates(p,iostB); |
|
} else { |
|
std::cerr<<"final ptr is invalid\n"; |
|
iostB<<"there are 0 target candidates\n"; |
|
} |
|
if(iostA.str() != iostB.str()) |
|
std::cerr<<"ERROR: translation candidates mismatch '"<<iostA.str()<<"' and for prefix pointer: '"<<iostB.str()<<"'\n"; |
|
|
|
std::cerr<<"translation candidates:\n"<<iostA.str()<<"\n"; |
|
pdt.FreeMemory(); |
|
|
|
} |
|
|
|
} |
|
} else { |
|
|
|
ConfusionNet net(&factorCollection); |
|
std::vector<std::vector<float> > weights; |
|
for(size_t i=0; i<pdicts.size(); ++i) |
|
weights.push_back(std::vector<float>(noScoreComponent,1/(1.0*noScoreComponent))); |
|
|
|
while(net.ReadF(std::cin,factorOrder,cn-1)) { |
|
net.Print(std::cerr); |
|
GenerateCandidates(net,pdicts,weights,verb); |
|
} |
|
|
|
} |
|
#else |
|
std::cerr<<"ERROR: these functions are currently broken...\n"; |
|
exit(1); |
|
#endif |
|
} |
|
} |
|
|
|
} |
|
|