/* * Sentence.cpp * * Created on: 14 Dec 2015 * Author: hieu */ #include "Sentence.h" #include "../System.h" using namespace std; namespace Moses2 { namespace SCFG { Sentence *Sentence::CreateFromString(MemPool &pool, FactorCollection &vocab, const System &system, const std::string &str, long translationId) { //cerr << "SCFG Sentence" << endl; Sentence *ret; if (system.options.input.xml_policy) { // xml ret = CreateFromStringXML(pool, vocab, system, str); //cerr << "ret=" << ret->Debug(system) << endl; } else { std::vector toks = Tokenize(str); size_t size = toks.size() + 2; ret = new (pool.Allocate()) Sentence(pool, size); ret->PhraseImplTemplate::CreateFromString(vocab, system, toks, true); } return ret; } Sentence *Sentence::CreateFromStringXML(MemPool &pool, FactorCollection &vocab, const System &system, const std::string &str) { Sentence *ret; vector xmlOptions; pugi::xml_document doc; string str2 = "" + str + ""; pugi::xml_parse_result result = doc.load(str2.c_str(), pugi::parse_cdata | pugi::parse_wconv_attribute | pugi::parse_eol | pugi::parse_comments); pugi::xml_node topNode = doc.child("xml"); std::vector toks; XMLParse(pool, system, 0, topNode, toks, xmlOptions); // debug /* cerr << "xmloptions:" << endl; for (size_t i = 0; i < xmlOptions.size(); ++i) { cerr << xmlOptions[i]->Debug(system) << endl; } */ // create words size_t size = toks.size() + 2; ret = new (pool.Allocate()) Sentence(pool, size); ret->PhraseImplTemplate::CreateFromString(vocab, system, toks, true); // xml for(size_t i=0; iGetNodeName(), "ne") == 0) { FactorType placeholderFactor = system.options.input.placeholder_factor; UTIL_THROW_IF2(placeholderFactor == NOT_FOUND, "Placeholder XML in input. Must have argument -placeholder-factor [NUM]"); UTIL_THROW_IF2(xmlOption->phraseSize != 1, "Placeholder must only cover 1 word"); const Factor *factor = vocab.AddFactor(xmlOption->GetEntity(), system, false); (*ret)[xmlOption->startPos + 1][placeholderFactor] = factor; } else { // default - forced translation. Add to class variable ret->AddXMLOption(system, xmlOption); } } //cerr << "ret=" << ret->Debug(system) << endl; return ret; } void Sentence::XMLParse( MemPool &pool, const System &system, size_t depth, const pugi::xml_node &parentNode, std::vector &toks, vector &xmlOptions) { // pugixml for (pugi::xml_node childNode = parentNode.first_child(); childNode; childNode = childNode.next_sibling()) { string nodeName = childNode.name(); //cerr << depth << " nodeName=" << nodeName << endl; int startPos = toks.size(); string value = childNode.value(); if (!value.empty()) { //cerr << depth << "childNode text=" << value << endl; std::vector subPhraseToks = Tokenize(value); for (size_t i = 0; i < subPhraseToks.size(); ++i) { toks.push_back(subPhraseToks[i]); } } if (!nodeName.empty()) { XMLOption *xmlOption = new (pool.Allocate()) XMLOption(pool, nodeName, startPos); pugi::xml_attribute attr; attr = childNode.attribute("translation"); if (!attr.empty()) { xmlOption->SetTranslation(pool, attr.as_string()); } attr = childNode.attribute("entity"); if (!attr.empty()) { xmlOption->SetEntity(pool, attr.as_string()); } attr = childNode.attribute("prob"); if (!attr.empty()) { xmlOption->prob = attr.as_float(); } xmlOptions.push_back(xmlOption); // recursively call this function. For proper recursive trees XMLParse(pool, system, depth + 1, childNode, toks, xmlOptions); size_t endPos = toks.size(); xmlOption->phraseSize = endPos - startPos; /* cerr << "xmlOptions="; xmlOption->Debug(cerr, system); cerr << endl; */ } } } } } /* namespace Moses2 */