|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#ifndef moses_WordsBitmap_h |
|
#define moses_WordsBitmap_h |
|
|
|
#include <algorithm> |
|
#include <limits> |
|
#include <vector> |
|
#include <iostream> |
|
#include <cstring> |
|
#include <cmath> |
|
#include <cstdlib> |
|
#include "TypeDef.h" |
|
#include "Range.h" |
|
|
|
namespace Moses |
|
{ |
|
typedef unsigned long WordsBitmapID; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class Bitmap |
|
{ |
|
friend std::ostream& operator<<(std::ostream& out, const Bitmap& bitmap); |
|
private: |
|
std::vector<char> m_bitmap; |
|
size_t m_firstGap; |
|
size_t m_numWordsCovered; |
|
|
|
Bitmap(); |
|
Bitmap& operator= (const Bitmap& other); |
|
|
|
|
|
void UpdateFirstGap(size_t startPos, size_t endPos, bool value) { |
|
if (value) { |
|
|
|
if (startPos <= m_firstGap && m_firstGap <= endPos) { |
|
m_firstGap = NOT_FOUND; |
|
for (size_t i = endPos + 1 ; i < m_bitmap.size(); ++i) { |
|
if (!m_bitmap[i]) { |
|
m_firstGap = i; |
|
break; |
|
} |
|
} |
|
} |
|
|
|
} else { |
|
|
|
if (startPos < m_firstGap) { |
|
m_firstGap = startPos; |
|
} |
|
} |
|
} |
|
|
|
|
|
void |
|
SetValueNonOverlap(Range const& range) { |
|
size_t startPos = range.GetStartPos(); |
|
size_t endPos = range.GetEndPos(); |
|
|
|
for(size_t pos = startPos ; pos <= endPos ; pos++) { |
|
m_bitmap[pos] = true; |
|
} |
|
|
|
m_numWordsCovered += range.GetNumWordsCovered(); |
|
UpdateFirstGap(startPos, endPos, true); |
|
} |
|
|
|
public: |
|
|
|
explicit Bitmap(size_t size, const std::vector<bool>& initializer); |
|
|
|
|
|
explicit Bitmap(size_t size); |
|
|
|
|
|
explicit Bitmap(const Bitmap ©); |
|
|
|
explicit Bitmap(const Bitmap ©, const Range &range); |
|
|
|
|
|
size_t GetNumWordsCovered() const { |
|
return m_numWordsCovered; |
|
} |
|
|
|
|
|
size_t GetFirstGapPos() const { |
|
return m_firstGap; |
|
} |
|
|
|
|
|
|
|
size_t GetLastGapPos() const { |
|
for (int pos = int(m_bitmap.size()) - 1 ; pos >= 0 ; pos--) { |
|
if (!m_bitmap[pos]) { |
|
return pos; |
|
} |
|
} |
|
|
|
return NOT_FOUND; |
|
} |
|
|
|
|
|
|
|
size_t GetLastPos() const { |
|
for (int pos = int(m_bitmap.size()) - 1 ; pos >= 0 ; pos--) { |
|
if (m_bitmap[pos]) { |
|
return pos; |
|
} |
|
} |
|
|
|
return NOT_FOUND; |
|
} |
|
|
|
|
|
bool GetValue(size_t pos) const { |
|
return bool(m_bitmap[pos]); |
|
} |
|
|
|
void SetValue( size_t pos, bool value ) { |
|
bool origValue = m_bitmap[pos]; |
|
if (origValue == value) { |
|
|
|
} else { |
|
m_bitmap[pos] = value; |
|
UpdateFirstGap(pos, pos, value); |
|
if (value) { |
|
++m_numWordsCovered; |
|
} else { |
|
--m_numWordsCovered; |
|
} |
|
} |
|
} |
|
|
|
|
|
bool IsComplete() const { |
|
return GetSize() == GetNumWordsCovered(); |
|
} |
|
|
|
bool Overlap(const Range &compare) const { |
|
for (size_t pos = compare.GetStartPos() ; pos <= compare.GetEndPos() ; pos++) { |
|
if (m_bitmap[pos]) |
|
return true; |
|
} |
|
return false; |
|
} |
|
|
|
size_t GetSize() const { |
|
return m_bitmap.size(); |
|
} |
|
|
|
inline size_t GetEdgeToTheLeftOf(size_t l) const { |
|
if (l == 0) return l; |
|
while (l && !m_bitmap[l-1]) { |
|
--l; |
|
} |
|
return l; |
|
} |
|
|
|
inline size_t GetEdgeToTheRightOf(size_t r) const { |
|
if (r+1 == m_bitmap.size()) return r; |
|
return ( |
|
std::find(m_bitmap.begin() + r + 1, m_bitmap.end(), true) - |
|
m_bitmap.begin() |
|
) - 1; |
|
} |
|
|
|
|
|
|
|
WordsBitmapID GetID() const { |
|
assert(m_bitmap.size() < (1<<16)); |
|
|
|
size_t start = GetFirstGapPos(); |
|
if (start == NOT_FOUND) start = m_bitmap.size(); |
|
|
|
size_t end = GetLastPos(); |
|
if (end == NOT_FOUND) end = 0; |
|
|
|
assert(end < start || end-start <= 16); |
|
WordsBitmapID id = 0; |
|
for(size_t pos = end; pos > start; pos--) { |
|
id = id*2 + (int) GetValue(pos); |
|
} |
|
return id + (1<<16) * start; |
|
} |
|
|
|
|
|
WordsBitmapID GetIDPlus( size_t startPos, size_t endPos ) const { |
|
assert(m_bitmap.size() < (1<<16)); |
|
|
|
size_t start = GetFirstGapPos(); |
|
if (start == NOT_FOUND) start = m_bitmap.size(); |
|
|
|
size_t end = GetLastPos(); |
|
if (end == NOT_FOUND) end = 0; |
|
|
|
if (start == startPos) start = endPos+1; |
|
if (end < endPos) end = endPos; |
|
|
|
assert(end < start || end-start <= 16); |
|
WordsBitmapID id = 0; |
|
for(size_t pos = end; pos > start; pos--) { |
|
id = id*2; |
|
if (GetValue(pos) || (startPos<=pos && pos<=endPos)) |
|
id++; |
|
} |
|
return id + (1<<16) * start; |
|
} |
|
|
|
|
|
size_t hash() const; |
|
bool operator==(const Bitmap& other) const; |
|
bool operator!=(const Bitmap& other) const { |
|
return !(*this == other); |
|
} |
|
|
|
TO_STRING(); |
|
}; |
|
|
|
} |
|
#endif |
|
|