File size: 1,576 Bytes
158b61b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
#include "moses/TranslationModel/UG/mm/ug_mm_ttrack.h"
#include "moses/TranslationModel/UG/mm/ug_mm_tsa.h"
#include "moses/TranslationModel/UG/mm/tpt_tokenindex.h"
#include "moses/TranslationModel/UG/mm/ug_corpus_token.h"
#include "moses/TranslationModel/UG/mm/ug_typedefs.h"
#include "moses/TranslationModel/UG/mm/tpt_pickler.h"
#include "moses/TranslationModel/UG/mm/ug_bitext.h"
#include "moses/TranslationModel/UG/mm/ug_lexical_phrase_scorer2.h"

#include "moses/TranslationModel/UG/generic/file_io/ug_stream.h"

// using namespace Moses;
using namespace ugdiss;
using namespace sapt;
using namespace std;

typedef L2R_Token<SimpleWordId> Token;
TokenIndex V;
SPTR<vector<vector<Token> > > C(new vector<vector<Token> >());
void
add_file(string fname)
{
  boost::iostreams::filtering_istream in;
  open_input_stream(fname,in);
  string line;
  while (getline(in,line))
    {
      C->push_back(vector<Token>());
      fill_token_seq(V,line,C->back());
    }
}

int
main(int argc, char* argv[])
{
  V.setDynamic(true);
  add_file(argv[1]);
  SPTR<imTtrack<Token> > T(new imTtrack<Token>(C));
  imTSA<Token> I(T,NULL,NULL);
  string line;
  while (getline(cin,line))
    {
      vector<Token> seq; fill_token_seq<Token>(V,line,seq);
      for (size_t i = 0; i < seq.size(); ++i)
	{
	  TSA<Token>::tree_iterator m(&I);
	  cout << V[seq[i].id()];
	  for (size_t k = i; k < seq.size() && m.extend(seq[k]); ++k)
	    {
	      cout << " ";
	      if (k > i) cout << V[seq[k].id()] << " ";
	      cout << "[" << m.approxOccurrenceCount() << "]";
	    }
	  cout << endl;
	}
    }
}