|
|
|
#pragma once |
|
#include "ug_bitext.h" |
|
|
|
namespace sapt |
|
{ |
|
template<typename TKN> |
|
class imBitext : public Bitext<TKN> |
|
{ |
|
SPTR<imTtrack<char> > myTx; |
|
SPTR<imTtrack<TKN> > myT1; |
|
SPTR<imTtrack<TKN> > myT2; |
|
SPTR<imTSA<TKN> > myI1; |
|
SPTR<imTSA<TKN> > myI2; |
|
static Moses::ThreadSafeCounter my_revision; |
|
public: |
|
size_t revision() const { return my_revision; } |
|
void open(std::string const base, std::string const L1, std::string L2); |
|
imBitext(SPTR<TokenIndex> const& V1, |
|
SPTR<TokenIndex> const& V2, |
|
size_t max_sample = 5000, size_t num_workers=4); |
|
imBitext(size_t max_sample = 5000, size_t num_workers=4); |
|
imBitext(imBitext const& other); |
|
|
|
|
|
|
|
|
|
SPTR<imBitext<TKN> > |
|
add(std::vector<std::string> const& s1, |
|
std::vector<std::string> const& s2, |
|
std::vector<std::string> const& a) const; |
|
|
|
}; |
|
|
|
template<typename TKN> |
|
Moses::ThreadSafeCounter |
|
imBitext<TKN>::my_revision; |
|
|
|
template<typename TKN> |
|
imBitext<TKN>:: |
|
imBitext(size_t max_sample, size_t num_workers) |
|
: Bitext<TKN>(max_sample, num_workers) |
|
{ |
|
this->m_default_sample_size = max_sample; |
|
this->V1.reset(new TokenIndex()); |
|
this->V2.reset(new TokenIndex()); |
|
this->V1->setDynamic(true); |
|
this->V2->setDynamic(true); |
|
++my_revision; |
|
} |
|
|
|
template<typename TKN> |
|
imBitext<TKN>:: |
|
imBitext(SPTR<TokenIndex> const& v1, |
|
SPTR<TokenIndex> const& v2, |
|
size_t max_sample, size_t num_workers) |
|
: Bitext<TKN>(max_sample, num_workers) |
|
{ |
|
|
|
this->V1 = v1; |
|
this->V2 = v2; |
|
this->V1->setDynamic(true); |
|
this->V2->setDynamic(true); |
|
++my_revision; |
|
} |
|
|
|
|
|
template<typename TKN> |
|
imBitext<TKN>:: |
|
imBitext(imBitext<TKN> const& other) |
|
{ |
|
this->myTx = other.myTx; |
|
this->myT1 = other.myT1; |
|
this->myT2 = other.myT2; |
|
this->myI1 = other.myI1; |
|
this->myI2 = other.myI2; |
|
this->Tx = this->myTx; |
|
this->T1 = this->myT1; |
|
this->T2 = this->myT2; |
|
this->I1 = this->myI1; |
|
this->I2 = this->myI2; |
|
this->V1 = other.V1; |
|
this->V2 = other.V2; |
|
this->m_default_sample_size = other.m_default_sample_size; |
|
this->m_num_workers = other.m_num_workers; |
|
++my_revision; |
|
} |
|
|
|
template<> |
|
SPTR<imBitext<L2R_Token<SimpleWordId> > > |
|
imBitext<L2R_Token<SimpleWordId> >:: |
|
add(std::vector<std::string> const& s1, |
|
std::vector<std::string> const& s2, |
|
std::vector<std::string> const& aln) const; |
|
|
|
template<typename TKN> |
|
SPTR<imBitext<TKN> > |
|
imBitext<TKN>:: |
|
add(std::vector<std::string> const& s1, |
|
std::vector<std::string> const& s2, |
|
std::vector<std::string> const& aln) const |
|
{ |
|
throw "Not yet implemented"; |
|
} |
|
|
|
|
|
template<typename TKN> |
|
void |
|
imBitext<TKN>:: |
|
open(std::string const base, std::string const L1, std::string L2) |
|
{ |
|
mmTtrack<TKN>& t1 = *reinterpret_cast<mmTtrack<TKN>*>(this->T1.get()); |
|
mmTtrack<TKN>& t2 = *reinterpret_cast<mmTtrack<TKN>*>(this->T2.get()); |
|
mmTtrack<char>& tx = *reinterpret_cast<mmTtrack<char>*>(this->Tx.get()); |
|
t1.open(base+L1+".mct"); |
|
t2.open(base+L2+".mct"); |
|
tx.open(base+L1+"-"+L2+".mam"); |
|
this->V1->open(base+L1+".tdx"); this->V1->iniReverseIndex(); |
|
this->V2->open(base+L2+".tdx"); this->V2->iniReverseIndex(); |
|
mmTSA<TKN>& i1 = *reinterpret_cast<mmTSA<TKN>*>(this->I1.get()); |
|
mmTSA<TKN>& i2 = *reinterpret_cast<mmTSA<TKN>*>(this->I2.get()); |
|
i1.open(base+L1+".sfa", this->T1); |
|
i2.open(base+L2+".sfa", this->T2); |
|
assert(this->T1->size() == this->T2->size()); |
|
} |
|
|
|
} |
|
|
|
|