|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#include "SentenceAlignmentWithSyntax.h" |
|
|
|
#include <map> |
|
#include <set> |
|
#include <string> |
|
|
|
#include "tables-core.h" |
|
#include "XmlException.h" |
|
#include "XmlTree.h" |
|
#include "util/tokenize.hh" |
|
|
|
using namespace std; |
|
|
|
namespace MosesTraining |
|
{ |
|
|
|
bool SentenceAlignmentWithSyntax::processTargetSentence(const char * targetString, int sentenceID, bool boundaryRules) |
|
{ |
|
if (!m_targetSyntax) { |
|
return SentenceAlignment::processTargetSentence(targetString, sentenceID, boundaryRules); |
|
} |
|
|
|
string targetStringCPP(targetString); |
|
try { |
|
ProcessAndStripXMLTags(targetStringCPP, targetTree, |
|
m_targetLabelCollection, |
|
m_targetTopLabelCollection, |
|
false); |
|
} catch (const XmlException & e) { |
|
std::cerr << "WARNING: failed to process target sentence at line " |
|
<< sentenceID << ": " << e.getMsg() << std::endl; |
|
return false; |
|
} |
|
target = util::tokenize(targetStringCPP); |
|
return true; |
|
} |
|
|
|
bool SentenceAlignmentWithSyntax::processSourceSentence(const char * sourceString, int sentenceID, bool boundaryRules) |
|
{ |
|
if (!m_sourceSyntax) { |
|
return SentenceAlignment::processSourceSentence(sourceString, sentenceID, boundaryRules); |
|
} |
|
|
|
string sourceStringCPP(sourceString); |
|
try { |
|
ProcessAndStripXMLTags(sourceStringCPP, sourceTree, |
|
m_sourceLabelCollection , |
|
m_sourceTopLabelCollection, |
|
false); |
|
} catch (const XmlException & e) { |
|
std::cerr << "WARNING: failed to process source sentence at line " |
|
<< sentenceID << ": " << e.getMsg() << std::endl; |
|
return false; |
|
} |
|
source = util::tokenize(sourceStringCPP); |
|
return true; |
|
} |
|
|
|
} |
|
|