//-*- mode: c++; indent-tabs-mode: nil; tab-width: 2 -*- #include "ug_bitext.h" #include #include namespace sapt { float lbop(size_t const tries, size_t const succ, float const confidence) { return (confidence == 0 ? float(succ)/tries : (boost::math::binomial_distribution<>:: find_lower_bound_on_p(tries, succ, confidence))); } void snt_adder >:: operator()() { typedef L2R_Token tkn; std::vector sids; sids.reserve(snt.size()); BOOST_FOREACH(std::string const& foo, snt) { sids.push_back(track ? track->size() : 0); std::istringstream buf(foo); std::string w; std::vector s; s.reserve(100); while (buf >> w) s.push_back(tkn(V[w])); track = append(track,s); } if (index) index.reset(new imTSA(*index,track,sids,V.tsize())); else index.reset(new imTSA(track,NULL,NULL)); } snt_adder >:: snt_adder(std::vector const& s, TokenIndex& v, SPTR > >& t, SPTR > >& i) : snt(s), V(v), track(t), index(i) { } bool expand_phrase_pair (std::vector >& a1, std::vector >& a2, ushort const s2, // next word on in target side ushort const L1, ushort const R1, // limits of previous phrase ushort & s1, ushort & e1, ushort& e2) // start/end src; end trg { if (a2[s2].size() == 0) { std::cout << __FILE__ << ":" << __LINE__ << std::endl; return false; } bitvector done1(a1.size()); bitvector done2(a2.size()); std::vector > agenda; // x.first: side (1 or 2) // x.second: word position agenda.reserve(a1.size() + a2.size()); agenda.push_back(std::pair(2,s2)); e2 = s2; s1 = e1 = a2[s2].front(); if (s1 >= L1 && s1 < R1) { std::cout << __FILE__ << ":" << __LINE__ << std::endl; return false; } agenda.push_back(std::pair(2,s2)); while (agenda.size()) { ushort side = agenda.back().first; ushort p = agenda.back().second; agenda.pop_back(); if (side == 1) { done1.set(p); BOOST_FOREACH(ushort i, a1[p]) { if (i < s2) { // cout << __FILE__ << ":" << __LINE__ << endl; return false; } if (done2[i]) continue; for (;e2 <= i;++e2) if (!done2[e2]) agenda.push_back(std::pair(2,e2)); } } else { done2.set(p); BOOST_FOREACH(ushort i, a2[p]) { if ((e1 < L1 && i >= L1) || (s1 >= R1 && i < R1) || (i >= L1 && i < R1)) { // cout << __FILE__ << ":" << __LINE__ << " " // << L1 << "-" << R1 << " " << i << " " // << s1 << "-" << e1<< endl; return false; } if (e1 < i) { for (; e1 <= i; ++e1) if (!done1[e1]) agenda.push_back(std::pair(1,e1)); } else if (s1 > i) { for (; i <= s1; ++i) if (!done1[i]) agenda.push_back(std::pair(1,i)); } } } } ++e1; ++e2; return true; } void print_amatrix(std::vector > a1, uint32_t len2, ushort b1, ushort e1, ushort b2, ushort e2) { using namespace std; std::vector M(a1.size(),bitvector(len2)); for (ushort j = 0; j < a1.size(); ++j) { BOOST_FOREACH(ushort k, a1[j]) M[j].set(k); } cout << b1 << "-" << e1 << " " << b2 << "-" << e2 << endl; cout << " "; for (size_t c = 0; c < len2;++c) cout << c%10; cout << endl; for (size_t r = 0; r < M.size(); ++r) { cout << setw(3) << r << " "; for (size_t c = 0; c < M[r].size(); ++c) { if ((b1 <= r) && (r < e1) && b2 <= c && c < e2) cout << (M[r][c] ? 'x' : '-'); else cout << (M[r][c] ? 'o' : '.'); } cout << endl; } cout << std::string(90,'-') << endl; } void write_bitvector(bitvector const& v, std::ostream& out) { for (size_t i = v.find_first(); i < v.size();) { out << i; if ((i = v.find_next(i)) < v.size()) out << ","; } } }