AshanGimhana's picture
Upload folder using huggingface_hub
9375c9a verified
// Copyright (C) 2011 Davis E. King ([email protected])
// License: Boost Software License See LICENSE.txt for the full license.
#ifndef DLIB_MURMUR_HAsH_3_Hh_
#define DLIB_MURMUR_HAsH_3_Hh_
#include "murmur_hash3_abstract.h"
#include "../uintn.h"
#include <utility>
#include <string.h>
namespace dlib
{
//-----------------------------------------------------------------------------
// The original MurmurHash3 code was written by Austin Appleby, and is placed
// in the public domain. The author hereby disclaims copyright to this source code.
// The code in this particular file was modified by Davis E. King. In
// particular, endian-swapping was added along with some other minor code
// changes like avoiding strict aliasing violations.
//-----------------------------------------------------------------------------
// Platform-specific functions and macros
// Microsoft Visual Studio
#if ((defined(__GNUC__) && __GNUC__ >= 7) || defined(__clang__))
#define DLIB_FALLTHROUGH [[fallthrough]]
#else
#define DLIB_FALLTHROUGH
#endif
#if defined(_MSC_VER)
#define DLIB_FORCE_INLINE __forceinline
#include <stdlib.h>
#define DLIB_ROTL32(x,y) _rotl(x,y)
#define DLIB_ROTL64(x,y) _rotl64(x,y)
#define DLIB_BIG_CONSTANT(x) (x)
// Other compilers
#else // defined(_MSC_VER)
#define DLIB_FORCE_INLINE __attribute__((always_inline)) inline
inline uint32 murmur_rotl32 ( uint32 x, int8 r )
{
return (x << r) | (x >> (32 - r));
}
inline uint64 murmur_rotl64 ( uint64 x, int8 r )
{
return (x << r) | (x >> (64 - r));
}
#define DLIB_ROTL32(x,y) dlib::murmur_rotl32(x,y)
#define DLIB_ROTL64(x,y) dlib::murmur_rotl64(x,y)
#define DLIB_BIG_CONSTANT(x) (x##LLU)
#endif // !defined(_MSC_VER)
// ----------------------------------------------------------------------------------------
// Block read - if your platform needs to do endian-swapping or can only
// handle aligned reads, do the conversion here
DLIB_FORCE_INLINE uint32 murmur_getblock ( const uint32 * p, int i )
{
// The reason we do a memcpy() here instead of simply returning p[i] is because
// doing it this way avoids violations of the strict aliasing rule when all these
// functions are inlined into the user's code.
uint32 temp;
memcpy(&temp, p+i, 4);
return temp;
}
DLIB_FORCE_INLINE uint32 murmur_getblock_byte_swap ( const uint32 * p, int i )
{
union
{
uint8 bytes[4];
uint32 val;
} temp;
const uint8* pp = reinterpret_cast<const uint8*>(p + i);
temp.bytes[0] = pp[3];
temp.bytes[1] = pp[2];
temp.bytes[2] = pp[1];
temp.bytes[3] = pp[0];
return temp.val;
}
DLIB_FORCE_INLINE uint64 murmur_getblock ( const uint64 * p, int i )
{
// The reason we do a memcpy() here instead of simply returning p[i] is because
// doing it this way avoids violations of the strict aliasing rule when all these
// functions are inlined into the user's code.
uint64 temp;
memcpy(&temp, p+i, 8);
return temp;
}
DLIB_FORCE_INLINE uint64 murmur_getblock_byte_swap ( const uint64 * p, int i )
{
union
{
uint8 bytes[8];
uint64 val;
} temp;
const uint8* pp = reinterpret_cast<const uint8*>(p + i);
temp.bytes[0] = pp[7];
temp.bytes[1] = pp[6];
temp.bytes[2] = pp[5];
temp.bytes[3] = pp[4];
temp.bytes[4] = pp[3];
temp.bytes[5] = pp[2];
temp.bytes[6] = pp[1];
temp.bytes[7] = pp[0];
return temp.val;
}
// ----------------------------------------------------------------------------------------
// Finalization mix - force all bits of a hash block to avalanche
DLIB_FORCE_INLINE uint32 murmur_fmix ( uint32 h )
{
h ^= h >> 16;
h *= 0x85ebca6b;
h ^= h >> 13;
h *= 0xc2b2ae35;
h ^= h >> 16;
return h;
}
// ----------------------------------------------------------------------------------------
DLIB_FORCE_INLINE uint64 murmur_fmix ( uint64 k )
{
k ^= k >> 33;
k *= DLIB_BIG_CONSTANT(0xff51afd7ed558ccd);
k ^= k >> 33;
k *= DLIB_BIG_CONSTANT(0xc4ceb9fe1a85ec53);
k ^= k >> 33;
return k;
}
// ----------------------------------------------------------------------------------------
inline uint32 murmur_hash3 (
const void * key,
const int len,
const uint32 seed = 0
)
{
const uint8 * data = (const uint8*)key;
const int nblocks = len / 4;
uint32 h1 = seed;
uint32 c1 = 0xcc9e2d51;
uint32 c2 = 0x1b873593;
//----------
// body
const uint32 * blocks = (const uint32 *)(data + nblocks*4);
bool is_little_endian = true;
uint32 endian_test = 1;
if (*reinterpret_cast<unsigned char*>(&endian_test) != 1)
is_little_endian = false;
if (is_little_endian)
{
for(int i = -nblocks; i; i++)
{
uint32 k1 = murmur_getblock(blocks,i);
k1 *= c1;
k1 = DLIB_ROTL32(k1,15);
k1 *= c2;
h1 ^= k1;
h1 = DLIB_ROTL32(h1,13);
h1 = h1*5+0xe6546b64;
}
}
else
{
for(int i = -nblocks; i; i++)
{
uint32 k1 = murmur_getblock_byte_swap(blocks,i);
k1 *= c1;
k1 = DLIB_ROTL32(k1,15);
k1 *= c2;
h1 ^= k1;
h1 = DLIB_ROTL32(h1,13);
h1 = h1*5+0xe6546b64;
}
}
//----------
// tail
const uint8 * tail = (const uint8*)(data + nblocks*4);
uint32 k1 = 0;
switch(len & 3)
{
case 3: k1 ^= tail[2] << 16;
DLIB_FALLTHROUGH; // fall through
case 2: k1 ^= tail[1] << 8;
DLIB_FALLTHROUGH; // fall through
case 1: k1 ^= tail[0];
k1 *= c1; k1 = DLIB_ROTL32(k1,15); k1 *= c2; h1 ^= k1;
};
//----------
// finalization
h1 ^= len;
h1 = murmur_fmix(h1);
return h1;
}
// ----------------------------------------------------------------------------------------
inline uint32 murmur_hash3_2 (
const uint32 v1,
const uint32 v2
)
{
uint32 h1 = v2;
uint32 c1 = 0xcc9e2d51;
uint32 c2 = 0x1b873593;
//----------
// body
uint32 k1 = v1;
k1 *= c1;
k1 = DLIB_ROTL32(k1,15);
k1 *= c2;
h1 ^= k1;
h1 = DLIB_ROTL32(h1,13);
h1 = h1*5+0xe6546b64;
//----------
// finalization
h1 ^= 4; // =^ by length in bytes
h1 = murmur_fmix(h1);
return h1;
}
// ----------------------------------------------------------------------------------------
inline uint32 murmur_hash3_3 (
const uint32 v1,
const uint32 v2,
const uint32 v3
)
{
uint32 h1 = v3;
uint32 c1 = 0xcc9e2d51;
uint32 c2 = 0x1b873593;
//----------
// body
uint32 k1 = v1;
k1 *= c1;
k1 = DLIB_ROTL32(k1,15);
k1 *= c2;
h1 ^= k1;
h1 = DLIB_ROTL32(h1,13);
h1 = h1*5+0xe6546b64;
k1 = v2;
k1 *= c1;
k1 = DLIB_ROTL32(k1,15);
k1 *= c2;
h1 ^= k1;
h1 = DLIB_ROTL32(h1,13);
h1 = h1*5+0xe6546b64;
//----------
// finalization
h1 ^= 8; // =^ by length in bytes
h1 = murmur_fmix(h1);
return h1;
}
// ----------------------------------------------------------------------------------------
inline std::pair<uint64,uint64> murmur_hash3_128bit (
const void* key,
const int len,
const uint64 seed = 0
)
{
const uint8 * data = (const uint8*)key;
const int nblocks = len / 16;
uint64 h1 = seed;
uint64 h2 = seed;
uint64 c1 = DLIB_BIG_CONSTANT(0x87c37b91114253d5);
uint64 c2 = DLIB_BIG_CONSTANT(0x4cf5ad432745937f);
//----------
// body
const uint64 * blocks = (const uint64 *)(data);
bool is_little_endian = true;
uint32 endian_test = 1;
if (*reinterpret_cast<unsigned char*>(&endian_test) != 1)
is_little_endian = false;
if (is_little_endian)
{
for(int i = 0; i < nblocks; i++)
{
uint64 k1 = murmur_getblock(blocks,i*2+0);
uint64 k2 = murmur_getblock(blocks,i*2+1);
k1 *= c1; k1 = DLIB_ROTL64(k1,31); k1 *= c2; h1 ^= k1;
h1 = DLIB_ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
k2 *= c2; k2 = DLIB_ROTL64(k2,33); k2 *= c1; h2 ^= k2;
h2 = DLIB_ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
}
}
else
{
for(int i = 0; i < nblocks; i++)
{
uint64 k1 = murmur_getblock_byte_swap(blocks,i*2+0);
uint64 k2 = murmur_getblock_byte_swap(blocks,i*2+1);
k1 *= c1; k1 = DLIB_ROTL64(k1,31); k1 *= c2; h1 ^= k1;
h1 = DLIB_ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
k2 *= c2; k2 = DLIB_ROTL64(k2,33); k2 *= c1; h2 ^= k2;
h2 = DLIB_ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
}
}
//----------
// tail
const uint8 * tail = (const uint8*)(data + nblocks*16);
uint64 k1 = 0;
uint64 k2 = 0;
switch(len & 15)
{
case 15: k2 ^= uint64(tail[14]) << 48; DLIB_FALLTHROUGH; // fall through
case 14: k2 ^= uint64(tail[13]) << 40; DLIB_FALLTHROUGH; // fall through
case 13: k2 ^= uint64(tail[12]) << 32; DLIB_FALLTHROUGH; // fall through
case 12: k2 ^= uint64(tail[11]) << 24; DLIB_FALLTHROUGH; // fall through
case 11: k2 ^= uint64(tail[10]) << 16; DLIB_FALLTHROUGH; // fall through
case 10: k2 ^= uint64(tail[ 9]) << 8; DLIB_FALLTHROUGH; // fall through
case 9: k2 ^= uint64(tail[ 8]) << 0;
k2 *= c2; k2 = DLIB_ROTL64(k2,33); k2 *= c1; h2 ^= k2; DLIB_FALLTHROUGH; // fall through
case 8: k1 ^= uint64(tail[ 7]) << 56; DLIB_FALLTHROUGH; // fall through
case 7: k1 ^= uint64(tail[ 6]) << 48; DLIB_FALLTHROUGH; // fall through
case 6: k1 ^= uint64(tail[ 5]) << 40; DLIB_FALLTHROUGH; // fall through
case 5: k1 ^= uint64(tail[ 4]) << 32; DLIB_FALLTHROUGH; // fall through
case 4: k1 ^= uint64(tail[ 3]) << 24; DLIB_FALLTHROUGH; // fall through
case 3: k1 ^= uint64(tail[ 2]) << 16; DLIB_FALLTHROUGH; // fall through
case 2: k1 ^= uint64(tail[ 1]) << 8; DLIB_FALLTHROUGH; // fall through
case 1: k1 ^= uint64(tail[ 0]) << 0;
k1 *= c1; k1 = DLIB_ROTL64(k1,31); k1 *= c2; h1 ^= k1;
};
//----------
// finalization
h1 ^= len; h2 ^= len;
h1 += h2;
h2 += h1;
h1 = murmur_fmix(h1);
h2 = murmur_fmix(h2);
h1 += h2;
h2 += h1;
return std::make_pair(h1,h2);
}
// ----------------------------------------------------------------------------------------
inline std::pair<uint64,uint64> murmur_hash3_128bit (
const uint32& v1,
const uint32& v2,
const uint32& v3,
const uint32& v4
)
{
uint64 h1 = 0;
uint64 h2 = 0;
const uint64 c1 = DLIB_BIG_CONSTANT(0x87c37b91114253d5);
const uint64 c2 = DLIB_BIG_CONSTANT(0x4cf5ad432745937f);
//----------
// body
uint64 k1 = (static_cast<uint64>(v2)<<32)|v1;
uint64 k2 = (static_cast<uint64>(v4)<<32)|v3;
k1 *= c1; k1 = DLIB_ROTL64(k1,31); k1 *= c2;
h1 = DLIB_ROTL64(k1,27); h1 = h1*5+0x52dce729;
k2 *= c2; k2 = DLIB_ROTL64(k2,33); k2 *= c1;
h2 = DLIB_ROTL64(k2,31); h2 += h1; h2 = h2*5+0x38495ab5;
//----------
// finalization
h1 ^= 16; h2 ^= 16;
h1 += h2;
h2 += h1;
h1 = murmur_fmix(h1);
h2 = murmur_fmix(h2);
h1 += h2;
h2 += h1;
return std::make_pair(h1,h2);
}
// ----------------------------------------------------------------------------------------
inline std::pair<uint64,uint64> murmur_hash3_128bit_3 (
uint64 k1,
uint64 k2,
uint64 k3
)
{
uint64 h1 = k3;
uint64 h2 = k3;
const uint64 c1 = DLIB_BIG_CONSTANT(0x87c37b91114253d5);
const uint64 c2 = DLIB_BIG_CONSTANT(0x4cf5ad432745937f);
//----------
// body
k1 *= c1; k1 = DLIB_ROTL64(k1,31); k1 *= c2; h1 ^= k1;
h1 = DLIB_ROTL64(h1,27); h1 += h2; h1 = h1*5+0x52dce729;
k2 *= c2; k2 = DLIB_ROTL64(k2,33); k2 *= c1; h2 ^= k2;
h2 = DLIB_ROTL64(h2,31); h2 += h1; h2 = h2*5+0x38495ab5;
//----------
// finalization
h1 ^= 16; h2 ^= 16;
h1 += h2;
h2 += h1;
h1 = murmur_fmix(h1);
h2 = murmur_fmix(h2);
h1 += h2;
h2 += h1;
return std::make_pair(h1,h2);
}
// ----------------------------------------------------------------------------------------
}
#endif // DLIB_MURMUR_HAsH_3_Hh_