|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#ifndef moses_ListCoders_h |
|
#define moses_ListCoders_h |
|
|
|
#include <cmath> |
|
#include <cassert> |
|
|
|
namespace Moses |
|
{ |
|
|
|
template <typename T = unsigned int> |
|
class VarIntType |
|
{ |
|
private: |
|
template <typename IntType, typename OutIt> |
|
static void EncodeSymbol(IntType input, OutIt output) { |
|
if(input == 0) { |
|
*output = 0; |
|
output++; |
|
return; |
|
} |
|
|
|
T msb = 1 << (sizeof(T)*8-1); |
|
IntType mask = ~msb; |
|
IntType shift = (sizeof(T)*8-1); |
|
|
|
while(input) { |
|
T res = input & mask; |
|
input >>= shift; |
|
if(input) |
|
res |= msb; |
|
*output = res; |
|
output++; |
|
} |
|
}; |
|
|
|
template <typename InIt, typename IntType> |
|
static void DecodeSymbol(InIt &it, InIt end, IntType &output) { |
|
T msb = 1 << (sizeof(T)*8-1); |
|
IntType shift = (sizeof(T)*8-1); |
|
|
|
output = 0; |
|
size_t i = 0; |
|
while(it != end && *it & msb) { |
|
IntType temp = *it & ~msb; |
|
temp <<= shift*i; |
|
output |= temp; |
|
it++; |
|
i++; |
|
} |
|
assert(it != end); |
|
|
|
IntType temp = *it; |
|
temp <<= shift*i; |
|
output |= temp; |
|
it++; |
|
} |
|
|
|
public: |
|
|
|
template <typename InIt, typename OutIt> |
|
static void Encode(InIt it, InIt end, OutIt outIt) { |
|
while(it != end) { |
|
EncodeSymbol(*it, outIt); |
|
it++; |
|
} |
|
} |
|
|
|
template <typename InIt, typename OutIt> |
|
static void Decode(InIt &it, InIt end, OutIt outIt) { |
|
while(it != end) { |
|
size_t output; |
|
DecodeSymbol(it, end, output); |
|
*outIt = output; |
|
outIt++; |
|
} |
|
} |
|
|
|
template <typename InIt> |
|
static size_t DecodeAndSum(InIt &it, InIt end, size_t num) { |
|
size_t sum = 0; |
|
size_t curr = 0; |
|
|
|
while(it != end && curr < num) { |
|
size_t output; |
|
DecodeSymbol(it, end, output); |
|
sum += output; |
|
curr++; |
|
} |
|
|
|
return sum; |
|
} |
|
|
|
}; |
|
|
|
typedef VarIntType<unsigned char> VarByte; |
|
|
|
typedef VarByte VarInt8; |
|
typedef VarIntType<unsigned short> VarInt16; |
|
typedef VarIntType<unsigned int> VarInt32; |
|
|
|
class Simple9 |
|
{ |
|
private: |
|
typedef unsigned int uint; |
|
|
|
template <typename InIt> |
|
inline static void EncodeSymbol(uint &output, InIt it, InIt end) { |
|
uint length = end - it; |
|
|
|
uint type = 0; |
|
uint bitlength = 0; |
|
|
|
switch(length) { |
|
case 1: |
|
type = 1; |
|
bitlength = 28; |
|
break; |
|
case 2: |
|
type = 2; |
|
bitlength = 14; |
|
break; |
|
case 3: |
|
type = 3; |
|
bitlength = 9; |
|
break; |
|
case 4: |
|
type = 4; |
|
bitlength = 7; |
|
break; |
|
case 5: |
|
type = 5; |
|
bitlength = 5; |
|
break; |
|
case 7: |
|
type = 6; |
|
bitlength = 4; |
|
break; |
|
case 9: |
|
type = 7; |
|
bitlength = 3; |
|
break; |
|
case 14: |
|
type = 8; |
|
bitlength = 2; |
|
break; |
|
case 28: |
|
type = 9; |
|
bitlength = 1; |
|
break; |
|
} |
|
|
|
output = 0; |
|
output |= (type << 28); |
|
|
|
uint i = 0; |
|
while(it != end) { |
|
UTIL_THROW_IF2(*it > 268435455, "You are trying to encode " << *it |
|
<< " with Simple9. Cannot encode numbers larger than 268435455 (2^28-1)"); |
|
|
|
uint l = bitlength * (length-i-1); |
|
output |= *it << l; |
|
it++; |
|
i++; |
|
} |
|
} |
|
|
|
template <typename OutIt> |
|
static inline void DecodeSymbol(uint input, OutIt outIt) { |
|
uint type = (input >> 28); |
|
|
|
uint bitlen = 0; |
|
uint shift = 0; |
|
uint mask = 0; |
|
|
|
switch(type) { |
|
case 1: |
|
bitlen = 28; |
|
shift = 0; |
|
mask = 268435455; |
|
break; |
|
case 2: |
|
bitlen = 14; |
|
shift = 14; |
|
mask = 16383; |
|
break; |
|
case 3: |
|
bitlen = 9; |
|
shift = 18; |
|
mask = 511; |
|
break; |
|
case 4: |
|
bitlen = 7; |
|
shift = 21; |
|
mask = 127; |
|
break; |
|
case 5: |
|
bitlen = 5; |
|
shift = 20; |
|
mask = 31; |
|
break; |
|
case 6: |
|
bitlen = 4; |
|
shift = 24; |
|
mask = 15; |
|
break; |
|
case 7: |
|
bitlen = 3; |
|
shift = 24; |
|
mask = 7; |
|
break; |
|
case 8: |
|
bitlen = 2; |
|
shift = 26; |
|
mask = 3; |
|
break; |
|
case 9: |
|
bitlen = 1; |
|
shift = 27; |
|
mask = 1; |
|
break; |
|
} |
|
|
|
while(shift > 0) { |
|
*outIt = (input >> shift) & mask; |
|
shift -= bitlen; |
|
outIt++; |
|
} |
|
*outIt = input & mask; |
|
outIt++; |
|
} |
|
|
|
static inline size_t DecodeAndSumSymbol(uint input, size_t num, size_t &curr) { |
|
uint type = (input >> 28); |
|
|
|
uint bitlen = 0; |
|
uint shift = 0; |
|
uint mask = 0; |
|
|
|
switch(type) { |
|
case 1: |
|
bitlen = 28; |
|
shift = 0; |
|
mask = 268435455; |
|
break; |
|
case 2: |
|
bitlen = 14; |
|
shift = 14; |
|
mask = 16383; |
|
break; |
|
case 3: |
|
bitlen = 9; |
|
shift = 18; |
|
mask = 511; |
|
break; |
|
case 4: |
|
bitlen = 7; |
|
shift = 21; |
|
mask = 127; |
|
break; |
|
case 5: |
|
bitlen = 5; |
|
shift = 20; |
|
mask = 31; |
|
break; |
|
case 6: |
|
bitlen = 4; |
|
shift = 24; |
|
mask = 15; |
|
break; |
|
case 7: |
|
bitlen = 3; |
|
shift = 24; |
|
mask = 7; |
|
break; |
|
case 8: |
|
bitlen = 2; |
|
shift = 26; |
|
mask = 3; |
|
break; |
|
case 9: |
|
bitlen = 1; |
|
shift = 27; |
|
mask = 1; |
|
break; |
|
} |
|
|
|
size_t sum = 0; |
|
while(shift > 0) { |
|
sum += (input >> shift) & mask; |
|
shift -= bitlen; |
|
if(++curr == num) |
|
return sum; |
|
} |
|
sum += input & mask; |
|
curr++; |
|
return sum; |
|
} |
|
|
|
public: |
|
template <typename InIt, typename OutIt> |
|
static void Encode(InIt it, InIt end, OutIt outIt) { |
|
uint parts[] = { 1, 2, 3, 4, 5, 7, 9, 14, 28 }; |
|
|
|
uint buffer[28]; |
|
for(InIt i = it; i < end; i++) { |
|
uint lastbit = 1; |
|
uint lastpos = 0; |
|
uint lastyes = 0; |
|
uint j = 0; |
|
|
|
double log2 = log(2); |
|
while(j < 9 && lastpos < 28 && (i+lastpos) < end) { |
|
if(lastpos >= parts[j]) |
|
j++; |
|
|
|
buffer[lastpos] = *(i + lastpos); |
|
|
|
uint reqbit = ceil(log(buffer[lastpos]+1)/log2); |
|
assert(reqbit <= 28); |
|
|
|
uint bit = 28/floor(28/reqbit); |
|
if(lastbit < bit) |
|
lastbit = bit; |
|
|
|
if(parts[j] > 28/lastbit) |
|
break; |
|
else if(lastpos == parts[j]-1) |
|
lastyes = lastpos; |
|
|
|
lastpos++; |
|
} |
|
i += lastyes; |
|
|
|
uint length = lastyes + 1; |
|
uint output; |
|
EncodeSymbol(output, buffer, buffer + length); |
|
|
|
*outIt = output; |
|
outIt++; |
|
} |
|
} |
|
|
|
template <typename InIt, typename OutIt> |
|
static void Decode(InIt &it, InIt end, OutIt outIt) { |
|
while(it != end) { |
|
DecodeSymbol(*it, outIt); |
|
it++; |
|
} |
|
} |
|
|
|
template <typename InIt> |
|
static size_t DecodeAndSum(InIt &it, InIt end, size_t num) { |
|
size_t sum = 0; |
|
size_t curr = 0; |
|
while(it != end && curr < num) { |
|
sum += DecodeAndSumSymbol(*it, num, curr); |
|
it++; |
|
} |
|
assert(curr == num); |
|
return sum; |
|
} |
|
}; |
|
|
|
} |
|
|
|
#endif |
|
|