File size: 2,556 Bytes
158b61b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
#include <stdio.h>
#include <stdlib.h>
#include <cassert>
#include <algorithm>
#include <functional>
#include <boost/filesystem.hpp>
#include "pruneGeneration.h"
#include "moses/InputFileStream.h"
#include "moses/OutputFileStream.h"

using namespace std;

int main(int argc, char **argv)
{
  cerr << "Starting" << endl;
  int limit = atoi(argv[1]);
  string inPathStem = argv[2];
  string outPathStem = argv[3];

  namespace fs = boost::filesystem;

  //cerr << "inPathStem=" << inPathStem << endl;
  fs::path p(inPathStem);
  fs::path dir = p.parent_path();
  //cerr << "dir=" << dir << endl;

  fs::path fileStem = p.filename();
  string fileStemStr = fileStem.native();
  size_t fileStemStrSize = fileStemStr.size();
  //cerr << "fileStem=" << fileStemStr << endl;

  // loop thru each file in directory
  fs::directory_iterator end_iter;
  for( fs::directory_iterator dir_iter(dir) ; dir_iter != end_iter ; ++dir_iter) {
    if (fs::is_regular_file(dir_iter->status())) {
      fs::path currPath = *dir_iter;
      string currPathStr = currPath.native();
      //cerr << "currPathStr=" << currPathStr << endl;

      fs::path currFile = currPath.filename();
      string currFileStr = currFile.native();

      if (currFileStr.find(fileStemStr) == 0) {
        // found gen table we need
        //cerr << "found=" << currPathStr << endl;
        string suffix = currFileStr.substr(fileStemStrSize, currFileStr.size() - fileStemStrSize);
        string outPath = outPathStem + suffix;
        cerr << "PRUNING " << currPathStr << " TO " << outPath << endl;

        Moses::InputFileStream inStrme(currPathStr);
        Moses::OutputFileStream outStrme(outPath);
        Process(limit, inStrme, outStrme);

      }
    }
  }

  cerr << "Finished" << endl;
}

void Process(int limit, istream &inStrme, ostream &outStrme)
{
  vector<Rec> records;
  string prevInWord;
  string line;
  while (getline(inStrme, line)) {
    vector<string> toks;
    Tokenize(toks, line);
    assert(toks.size() == 4);

    if (prevInWord != toks[0]) {
      Output(outStrme, records, limit);
      records.clear();
    }

    // add new record
    float prob = atof(toks[2].c_str());
    records.push_back(Rec(prob, line));

    prevInWord = toks[0];
  }

  // last
  Output(outStrme, records, limit);
  records.clear();

}

void Output(ostream &outStrme, vector<Rec> &records, int limit)
{
  std::sort(records.rbegin(), records.rend());

  for (size_t i = 0; i < limit && i < records.size(); ++i) {
    const Rec &rec = records[i];
    outStrme << rec.line << endl;
  }
}