sakharamg
/

NMTKD

Model card Files Files and versions Community

NMTKD / translation /tools /mosesdecoder /phrase-extract /postprocess-egret-forests /SplitPointFileParser.cpp

sakharamg

Uploading all files

158b61b about 2 years ago

raw

history blame contribute delete

2.04 kB

	#include "SplitPointFileParser.h"

	#include <istream>
	#include <string>

	#include "util/string_piece.hh"
	#include "util/tokenize_piece.hh"

	#include "syntax-common/exception.h"

	namespace MosesTraining
	{
	namespace Syntax
	{
	namespace PostprocessEgretForests
	{

	SplitPointFileParser::SplitPointFileParser()
	: m_input(0)
	{
	}

	SplitPointFileParser::SplitPointFileParser(std::istream &input)
	: m_input(&input)
	{
	++(*this);
	}

	SplitPointFileParser &SplitPointFileParser::operator++()
	{
	if (!m_input) {
	return *this;
	}
	m_entry.splitPoints.clear();
	if (!std::getline(*m_input, m_tmpLine)) {
	m_input = 0;
	return *this;
	}
	ParseLine(m_tmpLine, m_entry.splitPoints);
	return *this;
	}

	void SplitPointFileParser::ParseLine(const std::string &line,
	std::vector<SplitPoint> &splitPoints)
	{
	std::string tmp;
	const util::AnyCharacter delimiter(" \t");
	for (util::TokenIter<util::AnyCharacter, true> p(line, delimiter); p; ++p) {
	splitPoints.resize(splitPoints.size()+1);
	SplitPoint &splitPoint = splitPoints.back();
	std::size_t pos = p->find(',');

	StringPiece sp = p->substr(0, pos);
	sp.CopyToString(&tmp);
	splitPoint.tokenPos = std::atoi(tmp.c_str());
	std::size_t begin = pos+1;
	pos = p->find(',', begin);

	sp = p->substr(begin, pos-begin);
	sp.CopyToString(&tmp);
	splitPoint.charPos = std::atoi(tmp.c_str());

	sp = p->substr(pos+1);
	sp.CopyToString(&splitPoint.connector);
	if (splitPoint.connector.size() > 1) {
	throw Exception("multi-character connectors not currently supported");
	}
	}
	}

	bool operator==(const SplitPointFileParser &lhs,
	const SplitPointFileParser &rhs)
	{
	// TODO Is this right? Compare values of istreams if non-zero?
	return lhs.m_input == rhs.m_input;
	}

	bool operator!=(const SplitPointFileParser &lhs,
	const SplitPointFileParser &rhs)
	{
	return !(lhs == rhs);
	}

	} // namespace PostprocessEgretForests
	} // namespace Syntax
	} // namespace MosesTraining