|
#if 0 |
|
|
|
#include "mmsapt.h" |
|
#include "moses/TranslationModel/PhraseDictionaryTreeAdaptor.h" |
|
#include "moses/TranslationModel/UG/generic/program_options/ug_splice_arglist.h" |
|
#include <boost/algorithm/string/predicate.hpp> |
|
#include <boost/foreach.hpp> |
|
#include <boost/format.hpp> |
|
#include <boost/tokenizer.hpp> |
|
#include <boost/shared_ptr.hpp> |
|
#include <algorithm> |
|
#include <iostream> |
|
|
|
using namespace Moses; |
|
using namespace sapt; |
|
using namespace std; |
|
using namespace boost; |
|
using namespace boost::algorithm; |
|
|
|
vector<FactorType> fo(1,FactorType(0)); |
|
|
|
class SimplePhrase : public Moses::Phrase |
|
{ |
|
vector<FactorType> const m_fo; |
|
public: |
|
SimplePhrase(): m_fo(1,FactorType(0)) {} |
|
|
|
void init(string const& s) |
|
{ |
|
istringstream buf(s); string w; |
|
while (buf >> w) |
|
{ |
|
Word wrd; |
|
this->AddWord().CreateFromString(Input,m_fo,StringPiece(w),false,false); |
|
} |
|
} |
|
}; |
|
|
|
class TargetPhraseIndexSorter |
|
{ |
|
TargetPhraseCollection const& my_tpc; |
|
CompareTargetPhrase cmp; |
|
public: |
|
TargetPhraseIndexSorter(TargetPhraseCollection const& tpc) : my_tpc(tpc) {} |
|
bool operator()(size_t a, size_t b) const |
|
{ |
|
|
|
return (my_tpc[a]->GetScoreBreakdown().GetWeightedScore() > |
|
my_tpc[b]->GetScoreBreakdown().GetWeightedScore()); |
|
} |
|
}; |
|
|
|
int main(int argc, char* argv[]) |
|
{ |
|
|
|
string vlevel = "alt"; |
|
vector<pair<string,int> > argfilter(5); |
|
argfilter[0] = std::make_pair(string("--spe-src"),1); |
|
argfilter[1] = std::make_pair(string("--spe-trg"),1); |
|
argfilter[2] = std::make_pair(string("--spe-aln"),1); |
|
argfilter[3] = std::make_pair(string("--spe-show"),1); |
|
|
|
char** my_args; int my_acnt; |
|
char** mo_args; int mo_acnt; |
|
filter_arguments(argc, argv, mo_acnt, &mo_args, my_acnt, &my_args, argfilter); |
|
|
|
ifstream spe_src,spe_trg,spe_aln; |
|
|
|
for (int i = 0; i < my_acnt; i += 2) |
|
{ |
|
if (!strcmp(my_args[i],"--spe-src")) |
|
spe_src.open(my_args[i+1]); |
|
else if (!strcmp(my_args[i],"--spe-trg")) |
|
spe_trg.open(my_args[i+1]); |
|
else if (!strcmp(my_args[i],"--spe-aln")) |
|
spe_aln.open(my_args[i+1]); |
|
else if (!strcmp(my_args[i],"--spe-show")) |
|
vlevel = my_args[i+1]; |
|
} |
|
|
|
Parameter params; |
|
if (!params.LoadParam(mo_acnt,mo_args) || |
|
!StaticData::LoadDataStatic(¶ms, mo_args[0])) |
|
exit(1); |
|
|
|
StaticData const& global = StaticData::Instance(); |
|
global.SetVerboseLevel(0); |
|
vector<FactorType> ifo = global.GetInputFactorOrder(); |
|
|
|
PhraseDictionary* PT = PhraseDictionary::GetColl()[0]; |
|
Mmsapt* mmsapt = dynamic_cast<Mmsapt*>(PT); |
|
if (!mmsapt) |
|
{ |
|
cerr << "Phrase table implementation not supported by this utility." << endl; |
|
exit(1); |
|
} |
|
mmsapt->SetTableLimit(0); |
|
|
|
string srcline,trgline,alnline; |
|
cout.precision(2); |
|
vector<string> fname = mmsapt->GetFeatureNames(); |
|
while (getline(spe_src,srcline)) |
|
{ |
|
UTIL_THROW_IF2(!getline(spe_trg,trgline), HERE |
|
<< ": missing data for online updates."); |
|
UTIL_THROW_IF2(!getline(spe_aln,alnline), HERE |
|
<< ": missing data for online updates."); |
|
cout << string(80,'-') << "\n" << srcline << "\n" << trgline << "\n" << endl; |
|
|
|
|
|
Sentence snt; |
|
istringstream buf(srcline+"\n"); |
|
if (!snt.Read(buf,ifo)) break; |
|
|
|
int dynprovidx = -1; |
|
for (size_t i = 0; i < fname.size(); ++i) |
|
{ |
|
if (starts_with(fname[i], "prov-1.")) |
|
dynprovidx = i; |
|
} |
|
cout << endl; |
|
for (size_t i = 0; i < snt.GetSize(); ++i) |
|
{ |
|
for (size_t k = i; k < snt.GetSize(); ++k) |
|
{ |
|
Phrase p = snt.GetSubString(Range(i,k)); |
|
if (!mmsapt->PrefixExists(p)) break; |
|
TargetPhraseCollection const* trg = PT->GetTargetPhraseCollectionLEGACY(p); |
|
if (!trg || !trg->GetSize()) continue; |
|
|
|
bool header_done = false; |
|
bool has_dynamic_match = vlevel == "all" || vlevel == "ALL"; |
|
vector<size_t> order; order.reserve(trg->GetSize()); |
|
size_t stop = trg->GetSize(); |
|
|
|
vector<size_t> o2(trg->GetSize()); |
|
for (size_t i = 0; i < stop; ++i) o2[i] = i; |
|
sort(o2.begin(),o2.end(),TargetPhraseIndexSorter(*trg)); |
|
|
|
for (size_t r = 0; r < stop; ++r) |
|
{ |
|
if (vlevel != "ALL") |
|
{ |
|
Phrase const& phr = static_cast<Phrase const&>(*(*trg)[o2[r]]); |
|
ostringstream buf; buf << phr; |
|
string tphrase = buf.str(); |
|
tphrase.erase(tphrase.size()-1); |
|
size_t s = trgline.find(tphrase); |
|
if (s == string::npos) continue; |
|
size_t e = s + tphrase.size(); |
|
if ((s && trgline[s-1] != ' ') || (e < trgline.size() && trgline[e] != ' ')) |
|
continue; |
|
} |
|
order.push_back(r); |
|
if (!has_dynamic_match) |
|
{ |
|
ScoreComponentCollection const& scc = (*trg)[o2[r]]->GetScoreBreakdown(); |
|
ScoreComponentCollection::IndexPair idx = scc.GetIndexes(PT); |
|
FVector const& scores = scc.GetScoresVector(); |
|
has_dynamic_match = scores[idx.first + dynprovidx] > 0; |
|
} |
|
} |
|
if ((vlevel == "alt" || vlevel == "new") && !has_dynamic_match) |
|
continue; |
|
|
|
|
|
BOOST_FOREACH(size_t const& r, order) |
|
{ |
|
ScoreComponentCollection const& scc = (*trg)[o2[r]]->GetScoreBreakdown(); |
|
ScoreComponentCollection::IndexPair idx = scc.GetIndexes(PT); |
|
FVector const& scores = scc.GetScoresVector(); |
|
float wscore = scc.GetWeightedScore(); |
|
if (vlevel == "new" && scores[idx.first + dynprovidx] == 0) |
|
continue; |
|
if (!header_done) |
|
{ |
|
cout << endl; |
|
if (trg->GetSize() == 1) |
|
cout << p << " (1 translation option)" << endl; |
|
else |
|
cout << p << " (" << trg->GetSize() << " translation options)" << endl; |
|
header_done = true; |
|
} |
|
Phrase const& phr = static_cast<Phrase const&>(*(*trg)[o2[r]]); |
|
cout << setw(3) << r+1 << " " << phr << endl; |
|
cout << " "; |
|
BOOST_FOREACH(string const& fn, fname) |
|
cout << " " << format("%10.10s") % fn; |
|
cout << endl; |
|
cout << " "; |
|
for (size_t x = idx.first; x < idx.second; ++x) |
|
{ |
|
size_t j = x-idx.first; |
|
float f = (mmsapt && mmsapt->isLogVal(j)) ? exp(scores[x]) : scores[x]; |
|
string fmt = (mmsapt && mmsapt->isInteger(j)) ? "%10d" : "%10.8f"; |
|
if (starts_with(fname[j], "lex")) fmt = "%10.3e"; |
|
else if (starts_with(fname[j], "prov-1.")) |
|
{ |
|
f = round(f/(1-f)); |
|
fmt = "%10d"; |
|
} |
|
cout << " " << format(fmt) % (mmsapt->isInteger(j) ? round(f) : f); |
|
} |
|
cout << " " << format("%10.3e") % exp(wscore) |
|
<< " " << format("%10.3e") % exp((*trg)[o2[r]]->GetFutureScore()) << endl; |
|
} |
|
mmsapt->Release(trg); |
|
continue; |
|
} |
|
} |
|
mmsapt->add(srcline,trgline,alnline); |
|
} |
|
|
|
exit(0); |
|
} |
|
#endif |
|
|
|
|
|
|