// Copyright (C) 2013 Davis E. King (davis@dlib.net) // License: Boost Software License See LICENSE.txt for the full license. #ifndef DLIB_sIMD8I_Hh_ #define DLIB_sIMD8I_Hh_ #include "simd_check.h" #include "../uintn.h" namespace dlib { #ifdef DLIB_HAVE_AVX class simd8i { public: typedef int32 type; inline simd8i() {} inline simd8i(int32 f) { x = _mm256_set1_epi32(f); } inline simd8i(int32 r0, int32 r1, int32 r2, int32 r3, int32 r4, int32 r5, int32 r6, int32 r7 ) { x = _mm256_setr_epi32(r0,r1,r2,r3,r4,r5,r6,r7); } inline simd8i(const __m256i& val):x(val) {} inline simd8i(const simd4i& low, const simd4i& high) { x = _mm256_insertf128_si256(_mm256_castsi128_si256(low),high,1); } inline simd8i& operator=(const __m256i& val) { x = val; return *this; } inline operator __m256i() const { return x; } inline void load_aligned(const type* ptr) { x = _mm256_load_si256((const __m256i*)ptr); } inline void store_aligned(type* ptr) const { _mm256_store_si256((__m256i*)ptr, x); } inline void load(const type* ptr) { x = _mm256_loadu_si256((const __m256i*)ptr); } inline void store(type* ptr) const { _mm256_storeu_si256((__m256i*)ptr, x); } inline simd4i low() const { return _mm256_castsi256_si128(x); } inline simd4i high() const { return _mm256_extractf128_si256(x,1); } inline unsigned int size() const { return 8; } inline int32 operator[](unsigned int idx) const { int32 temp[8]; store(temp); return temp[idx]; } private: __m256i x; }; #else class simd8i { public: typedef int32 type; inline simd8i() {} inline simd8i(const simd4i& low_, const simd4i& high_): _low(low_),_high(high_){} inline simd8i(int32 f) :_low(f),_high(f) {} inline simd8i(int32 r0, int32 r1, int32 r2, int32 r3, int32 r4, int32 r5, int32 r6, int32 r7) : _low(r0,r1,r2,r3), _high(r4,r5,r6,r7) {} struct rawarray { simd4i low, high; }; inline simd8i(const rawarray& a) { _low = a.low; _high = a.high; } inline void load_aligned(const type* ptr) { _low.load_aligned(ptr); _high.load_aligned(ptr+4); } inline void store_aligned(type* ptr) const { _low.store_aligned(ptr); _high.store_aligned(ptr+4); } inline void load(const type* ptr) { _low.load(ptr); _high.load(ptr+4); } inline void store(type* ptr) const { _low.store(ptr); _high.store(ptr+4); } inline unsigned int size() const { return 8; } inline int32 operator[](unsigned int idx) const { if (idx < 4) return _low[idx]; else return _high[idx-4]; } inline const simd4i& low() const { return _low; } inline const simd4i& high() const { return _high; } private: simd4i _low, _high; }; #endif // ---------------------------------------------------------------------------------------- inline std::ostream& operator<<(std::ostream& out, const simd8i& item) { int32 temp[8]; item.store(temp); out << "(" << temp[0] << ", " << temp[1] << ", " << temp[2] << ", " << temp[3] << ", " << temp[4] << ", " << temp[5] << ", " << temp[6] << ", " << temp[7] << ")"; return out; } // ---------------------------------------------------------------------------------------- inline simd8i operator+ (const simd8i& lhs, const simd8i& rhs) { #ifdef DLIB_HAVE_AVX2 return _mm256_add_epi32(lhs, rhs); #else return simd8i(lhs.low()+rhs.low(), lhs.high()+rhs.high()); #endif } inline simd8i& operator+= (simd8i& lhs, const simd8i& rhs) { return lhs = lhs + rhs; return lhs;} // ---------------------------------------------------------------------------------------- inline simd8i operator- (const simd8i& lhs, const simd8i& rhs) { #ifdef DLIB_HAVE_AVX2 return _mm256_sub_epi32(lhs, rhs); #else return simd8i(lhs.low()-rhs.low(), lhs.high()-rhs.high()); #endif } inline simd8i& operator-= (simd8i& lhs, const simd8i& rhs) { return lhs = lhs - rhs; return lhs;} // ---------------------------------------------------------------------------------------- inline simd8i operator* (const simd8i& lhs, const simd8i& rhs) { #ifdef DLIB_HAVE_AVX2 return _mm256_mullo_epi32(lhs, rhs); #else return simd8i(lhs.low()*rhs.low(), lhs.high()*rhs.high()); #endif } inline simd8i& operator*= (simd8i& lhs, const simd8i& rhs) { return lhs = lhs * rhs; return lhs;} // ---------------------------------------------------------------------------------------- inline simd8i operator& (const simd8i& lhs, const simd8i& rhs) { #ifdef DLIB_HAVE_AVX2 return _mm256_and_si256(lhs, rhs); #else return simd8i(lhs.low()&rhs.low(), lhs.high()&rhs.high()); #endif } inline simd8i& operator&= (simd8i& lhs, const simd8i& rhs) { return lhs = lhs & rhs; return lhs;} // ---------------------------------------------------------------------------------------- inline simd8i operator| (const simd8i& lhs, const simd8i& rhs) { #ifdef DLIB_HAVE_AVX2 return _mm256_or_si256(lhs, rhs); #else return simd8i(lhs.low()|rhs.low(), lhs.high()|rhs.high()); #endif } inline simd8i& operator|= (simd8i& lhs, const simd8i& rhs) { return lhs = lhs | rhs; return lhs;} // ---------------------------------------------------------------------------------------- inline simd8i operator^ (const simd8i& lhs, const simd8i& rhs) { #ifdef DLIB_HAVE_AVX2 return _mm256_xor_si256(lhs, rhs); #else return simd8i(lhs.low()^rhs.low(), lhs.high()^rhs.high()); #endif } inline simd8i& operator^= (simd8i& lhs, const simd8i& rhs) { return lhs = lhs ^ rhs; return lhs;} // ---------------------------------------------------------------------------------------- inline simd8i operator~ (const simd8i& lhs) { #ifdef DLIB_HAVE_AVX2 return _mm256_xor_si256(lhs, _mm256_set1_epi32(0xFFFFFFFF)); #else return simd8i(~lhs.low(), ~lhs.high()); #endif } // ---------------------------------------------------------------------------------------- inline simd8i operator<< (const simd8i& lhs, const int& rhs) { #ifdef DLIB_HAVE_AVX2 return _mm256_sll_epi32(lhs,_mm_cvtsi32_si128(rhs)); #else return simd8i(lhs.low()<<rhs, lhs.high()<<rhs); #endif } inline simd8i& operator<<= (simd8i& lhs, const int& rhs) { return lhs = lhs << rhs; return lhs;} // ---------------------------------------------------------------------------------------- inline simd8i operator>> (const simd8i& lhs, const int& rhs) { #ifdef DLIB_HAVE_AVX2 return _mm256_sra_epi32(lhs,_mm_cvtsi32_si128(rhs)); #else return simd8i(lhs.low()>>rhs, lhs.high()>>rhs); #endif } inline simd8i& operator>>= (simd8i& lhs, const int& rhs) { return lhs = lhs >> rhs; return lhs;} // ---------------------------------------------------------------------------------------- inline simd8i operator== (const simd8i& lhs, const simd8i& rhs) { #ifdef DLIB_HAVE_AVX2 return _mm256_cmpeq_epi32(lhs, rhs); #else return simd8i(lhs.low()==rhs.low(), lhs.high()==rhs.high()); #endif } // ---------------------------------------------------------------------------------------- inline simd8i operator!= (const simd8i& lhs, const simd8i& rhs) { return ~(lhs==rhs); } // ---------------------------------------------------------------------------------------- inline simd8i operator> (const simd8i& lhs, const simd8i& rhs) { #ifdef DLIB_HAVE_AVX2 return _mm256_cmpgt_epi32(lhs, rhs); #else return simd8i(lhs.low()>rhs.low(), lhs.high()>rhs.high()); #endif } // ---------------------------------------------------------------------------------------- inline simd8i operator< (const simd8i& lhs, const simd8i& rhs) { return rhs > lhs; } // ---------------------------------------------------------------------------------------- inline simd8i operator<= (const simd8i& lhs, const simd8i& rhs) { return ~(lhs > rhs); } // ---------------------------------------------------------------------------------------- inline simd8i operator>= (const simd8i& lhs, const simd8i& rhs) { return rhs <= lhs; } // ---------------------------------------------------------------------------------------- inline simd8i min (const simd8i& lhs, const simd8i& rhs) { #ifdef DLIB_HAVE_AVX2 return _mm256_min_epi32(lhs, rhs); #else return simd8i(min(lhs.low(),rhs.low()), min(lhs.high(),rhs.high())); #endif } // ---------------------------------------------------------------------------------------- inline simd8i max (const simd8i& lhs, const simd8i& rhs) { #ifdef DLIB_HAVE_AVX2 return _mm256_max_epi32(lhs, rhs); #else return simd8i(max(lhs.low(),rhs.low()), max(lhs.high(),rhs.high())); #endif } // ---------------------------------------------------------------------------------------- inline int32 sum(const simd8i& item) { return sum(item.low()+item.high()); } // ---------------------------------------------------------------------------------------- // perform cmp ? a : b inline simd8i select(const simd8i& cmp, const simd8i& a, const simd8i& b) { #ifdef DLIB_HAVE_AVX2 return _mm256_blendv_epi8(b,a,cmp); #else return simd8i(select(cmp.low(), a.low(), b.low()), select(cmp.high(), a.high(), b.high())); #endif } // ---------------------------------------------------------------------------------------- } #endif // DLIB_sIMD8I_Hh_