|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#include "pcfg_extract.h" |
|
|
|
#include <cassert> |
|
#include <cstdlib> |
|
#include <fstream> |
|
#include <iostream> |
|
#include <map> |
|
#include <memory> |
|
#include <set> |
|
#include <string> |
|
#include <vector> |
|
|
|
#include <boost/program_options.hpp> |
|
|
|
#include "syntax-common/exception.h" |
|
#include "syntax-common/pcfg.h" |
|
#include "syntax-common/vocabulary.h" |
|
#include "syntax-common/xml_tree_parser.h" |
|
|
|
#include "SyntaxTree.h" |
|
|
|
#include "options.h" |
|
#include "rule_collection.h" |
|
#include "rule_extractor.h" |
|
|
|
namespace MosesTraining |
|
{ |
|
namespace Syntax |
|
{ |
|
namespace PCFG |
|
{ |
|
|
|
int PcfgExtract::Main(int argc, char *argv[]) |
|
{ |
|
|
|
Options options; |
|
ProcessOptions(argc, argv, options); |
|
|
|
|
|
Vocabulary non_term_vocab; |
|
RuleExtractor rule_extractor(non_term_vocab); |
|
RuleCollection rule_collection; |
|
XmlTreeParser parser; |
|
std::string line; |
|
std::size_t line_num = 0; |
|
std::auto_ptr<MosesTraining::SyntaxTree> tree; |
|
while (std::getline(std::cin, line)) { |
|
++line_num; |
|
try { |
|
tree = parser.Parse(line); |
|
} catch (Exception &e) { |
|
std::ostringstream msg; |
|
msg << "line " << line_num << ": " << e.msg(); |
|
Error(msg.str()); |
|
} |
|
if (!tree.get()) { |
|
std::ostringstream msg; |
|
msg << "no tree at line " << line_num; |
|
Warn(msg.str()); |
|
continue; |
|
} |
|
rule_extractor.Extract(*tree, rule_collection); |
|
} |
|
|
|
|
|
Pcfg pcfg; |
|
rule_collection.CreatePcfg(pcfg); |
|
pcfg.Write(non_term_vocab, std::cout); |
|
|
|
return 0; |
|
} |
|
|
|
void PcfgExtract::ProcessOptions(int argc, char *argv[], |
|
Options &options) const |
|
{ |
|
namespace po = boost::program_options; |
|
|
|
std::ostringstream usage_top; |
|
usage_top << "Usage: " << name() << "\n\n" << "Options"; |
|
|
|
|
|
po::options_description visible(usage_top.str()); |
|
visible.add_options() |
|
("help", "print help message and exit") |
|
; |
|
|
|
|
|
|
|
po::options_description hidden("Hidden options"); |
|
hidden.add_options(); |
|
|
|
|
|
po::options_description cmd_line_options; |
|
cmd_line_options.add(visible).add(hidden); |
|
|
|
|
|
po::positional_options_description p; |
|
|
|
|
|
po::variables_map vm; |
|
try { |
|
po::store(po::command_line_parser(argc, argv).style(MosesOptionStyle()). |
|
options(cmd_line_options).positional(p).run(), vm); |
|
po::notify(vm); |
|
} catch (const std::exception &e) { |
|
std::ostringstream msg; |
|
msg << e.what() << "\n\n" << visible; |
|
Error(msg.str()); |
|
} |
|
|
|
if (vm.count("help")) { |
|
std::cout << visible << std::endl; |
|
std::exit(0); |
|
} |
|
} |
|
|
|
} |
|
} |
|
} |
|
|