|
|
|
|
|
#include <boost/program_options.hpp> |
|
#include <iomanip> |
|
|
|
#include "tpt_typedefs.h" |
|
#include "ug_mm_ttrack.h" |
|
#include "tpt_tokenindex.h" |
|
#include "ug_deptree.h" |
|
#include "ug_corpus_token.h" |
|
|
|
using namespace std; |
|
using namespace sapt; |
|
namespace po = boost::program_options; |
|
|
|
string bname,mtt,mct; |
|
vector<string> range; |
|
|
|
typedef L2R_Token<Conll_Sform> Token; |
|
|
|
TokenIndex SF,LM,PS,DT; |
|
mmTtrack<Token> MTT; |
|
mmTtrack<SimpleWordId> MCT; |
|
bool sform; |
|
bool have_mtt, have_mct; |
|
bool with_sids; |
|
bool with_positions; |
|
void |
|
interpret_args(int ac, char* av[]) |
|
{ |
|
po::variables_map vm; |
|
po::options_description o("Options"); |
|
o.add_options() |
|
("help,h", "print this message") |
|
("numbers,n", po::bool_switch(&with_sids), "print sentence ids as first token") |
|
("sform,s", po::bool_switch(&sform), "sform only") |
|
("with-positions,p", po::bool_switch(&with_positions), "show word positions") |
|
; |
|
|
|
po::options_description h("Hidden Options"); |
|
h.add_options() |
|
("bname", po::value<string>(&bname), "base name") |
|
("range", po::value<vector<string> >(&range), "range") |
|
; |
|
po::positional_options_description a; |
|
a.add("bname",1); |
|
a.add("range",-1); |
|
|
|
po::store(po::command_line_parser(ac,av) |
|
.options(h.add(o)) |
|
.positional(a) |
|
.run(),vm); |
|
po::notify(vm); |
|
if (vm.count("help") || bname.empty()) |
|
{ |
|
cout << "usage:\n\t" |
|
<< av[0] << " track name [<range>]\n" |
|
<< endl; |
|
cout << o << endl; |
|
exit(0); |
|
} |
|
mtt = bname+".mtt"; |
|
mct = bname+".mct"; |
|
} |
|
|
|
void |
|
printRangeMTT(size_t start, size_t stop) |
|
{ |
|
for (;start < stop; start++) |
|
{ |
|
size_t i = 0; |
|
Token const* s = MTT.sntStart(start); |
|
Token const* e = MTT.sntEnd(start); |
|
if (with_sids) cout << start << " "; |
|
for (Token const* t = s; t < e; ++t) |
|
{ |
|
#if 0 |
|
uchar const* x = reinterpret_cast<uchar const*>(t); |
|
cout << *reinterpret_cast<id_type const*>(x) << " "; |
|
cout << *reinterpret_cast<id_type const*>(x+4) << " "; |
|
cout << int(*(x+8)) << " "; |
|
cout << int(*(x+9)) << " "; |
|
cout << *reinterpret_cast<short const*>(x+10) << endl; |
|
#endif |
|
if (!sform) |
|
{ |
|
cout << setw(2) << right << ++i << " "; |
|
cout << setw(30) << right << SF[t->sform] << " "; |
|
cout << setw(4) << right << PS[t->majpos] << " "; |
|
cout << setw(4) << right << PS[t->minpos] << " "; |
|
cout << setw(30) << left << LM[t->lemma] << " "; |
|
cout << i+t->parent << " "; |
|
cout << DT[t->dtype] << endl; |
|
} |
|
else |
|
{ |
|
if (with_positions) cout << t-s << ":"; |
|
cout << SF[t->id()] << " "; |
|
} |
|
} |
|
cout << endl; |
|
} |
|
} |
|
|
|
void |
|
printRangeMCT(size_t start, size_t stop) |
|
{ |
|
for (;start < stop; start++) |
|
{ |
|
SimpleWordId const* s = MCT.sntStart(start); |
|
SimpleWordId const* t = s; |
|
SimpleWordId const* e = MCT.sntEnd(start); |
|
if (with_sids) cout << start << " "; |
|
while (t < e) |
|
{ |
|
if (with_positions) cout << t-s << ":"; |
|
cout << SF[(t++)->id()] << " "; |
|
} |
|
cout << endl; |
|
} |
|
} |
|
|
|
int |
|
main(int argc, char*argv[]) |
|
{ |
|
interpret_args(argc,argv); |
|
have_mtt = !access(mtt.c_str(),F_OK); |
|
have_mct = !have_mtt && !access(mct.c_str(),F_OK); |
|
if (!have_mtt && !have_mct) |
|
{ |
|
cerr << "FATAL ERROR: neither " << mtt << " nor " << mct << " exit." << endl; |
|
exit(1); |
|
} |
|
if (have_mtt) |
|
{ |
|
SF.open(bname+".tdx.sfo"); SF.iniReverseIndex(); |
|
LM.open(bname+".tdx.lem"); LM.iniReverseIndex(); |
|
PS.open(bname+".tdx.pos"); PS.iniReverseIndex(); |
|
DT.open(bname+".tdx.drl"); DT.iniReverseIndex(); |
|
MTT.open(mtt); |
|
} |
|
else |
|
{ |
|
sform = true; |
|
SF.open(bname+".tdx"); SF.iniReverseIndex(); |
|
MCT.open(mct); |
|
} |
|
|
|
if (!range.size()) |
|
have_mtt ? printRangeMTT(0, MTT.size()) : printRangeMCT(0, MCT.size()); |
|
else |
|
{ |
|
for (size_t i = 0; i < range.size(); i++) |
|
{ |
|
istringstream buf(range[i]); |
|
size_t first,last; uchar c; |
|
buf>>first; |
|
if (buf.peek() == '-') buf>>c>>last; |
|
else last = first; |
|
if (have_mtt && last < MTT.size()) |
|
printRangeMTT(first,last+1); |
|
else if (last < MCT.size()) |
|
printRangeMCT(first,last+1); |
|
} |
|
} |
|
} |
|
|