|
#ifndef C10_MACROS_MACROS_H_ |
|
#define C10_MACROS_MACROS_H_ |
|
#include <cassert> |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#ifndef C10_USING_CUSTOM_GENERATED_MACROS |
|
#include <c10/macros/cmake_macros.h> |
|
#endif |
|
|
|
#include <c10/macros/Export.h> |
|
|
|
#if defined(__clang__) |
|
#define __ubsan_ignore_float_divide_by_zero__ \ |
|
__attribute__((no_sanitize("float-divide-by-zero"))) |
|
#define __ubsan_ignore_undefined__ __attribute__((no_sanitize("undefined"))) |
|
#define __ubsan_ignore_signed_int_overflow__ \ |
|
__attribute__((no_sanitize("signed-integer-overflow"))) |
|
#define __ubsan_ignore_pointer_overflow__ \ |
|
__attribute__((no_sanitize("pointer-overflow"))) |
|
#define __ubsan_ignore_function__ __attribute__((no_sanitize("function"))) |
|
#else |
|
#define __ubsan_ignore_float_divide_by_zero__ |
|
#define __ubsan_ignore_undefined__ |
|
#define __ubsan_ignore_signed_int_overflow__ |
|
#define __ubsan_ignore_pointer_overflow__ |
|
#define __ubsan_ignore_function__ |
|
#endif |
|
|
|
|
|
#undef C10_ASAN_ENABLED |
|
|
|
|
|
#if defined(__has_feature) |
|
#if ((__has_feature(address_sanitizer))) |
|
#define C10_ASAN_ENABLED 1 |
|
#endif |
|
#endif |
|
|
|
|
|
#if defined(__SANITIZE_ADDRESS__) |
|
#if __SANITIZE_ADDRESS__ |
|
#if !defined(C10_ASAN_ENABLED) |
|
#define C10_ASAN_ENABLED 1 |
|
#endif |
|
#endif |
|
#endif |
|
|
|
#if !defined(C10_ASAN_ENABLED) |
|
#define C10_ASAN_ENABLED 0 |
|
#endif |
|
|
|
|
|
|
|
#define C10_DISABLE_COPY_AND_ASSIGN(classname) \ |
|
classname(const classname&) = delete; \ |
|
classname& operator=(const classname&) = delete |
|
|
|
#define C10_CONCATENATE_IMPL(s1, s2) s1##s2 |
|
#define C10_CONCATENATE(s1, s2) C10_CONCATENATE_IMPL(s1, s2) |
|
|
|
#define C10_MACRO_EXPAND(args) args |
|
|
|
#define C10_STRINGIZE_IMPL(x) #x |
|
#define C10_STRINGIZE(x) C10_STRINGIZE_IMPL(x) |
|
|
|
|
|
|
|
|
|
|
|
#ifdef __COUNTER__ |
|
#define C10_UID __COUNTER__ |
|
#define C10_ANONYMOUS_VARIABLE(str) C10_CONCATENATE(str, __COUNTER__) |
|
#else |
|
#define C10_UID __LINE__ |
|
#define C10_ANONYMOUS_VARIABLE(str) C10_CONCATENATE(str, __LINE__) |
|
#endif |
|
|
|
#ifdef __has_cpp_attribute |
|
#define C10_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x) |
|
#else |
|
#define C10_HAS_CPP_ATTRIBUTE(x) (0) |
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#if C10_HAS_CPP_ATTRIBUTE(nodiscard) |
|
#define C10_NODISCARD [[nodiscard]] |
|
|
|
|
|
#elif __cplusplus && C10_HAS_CPP_ATTRIBUTE(clang::warn_unused_result) |
|
|
|
|
|
|
|
#define C10_NODISCARD [[clang::warn_unused_result]] |
|
#else |
|
#define C10_NODISCARD |
|
#endif |
|
|
|
|
|
#if defined(_MSC_VER) && !defined(__clang__) |
|
#define C10_UNUSED __pragma(warning(suppress : 4100 4101)) |
|
#else |
|
#define C10_UNUSED __attribute__((__unused__)) |
|
#endif |
|
|
|
#if !defined(__has_attribute) |
|
#define __has_attribute(x) 0 |
|
#endif |
|
|
|
|
|
#if __has_attribute(used) |
|
#define C10_USED __attribute__((__used__)) |
|
#else |
|
#define C10_USED |
|
#endif |
|
|
|
#define C10_RESTRICT __restrict |
|
|
|
|
|
|
|
namespace c10 {} |
|
namespace c10::cuda {} |
|
namespace c10::hip {} |
|
namespace c10::xpu {} |
|
|
|
|
|
|
|
|
|
|
|
namespace caffe2 { |
|
using namespace c10; |
|
} |
|
namespace at { |
|
using namespace c10; |
|
} |
|
namespace at::cuda { |
|
using namespace c10::cuda; |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
namespace at::cuda { |
|
using namespace c10::hip; |
|
} |
|
|
|
namespace at::xpu { |
|
using namespace c10::xpu; |
|
} |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#if defined(__GNUC__) || defined(__ICL) || defined(__clang__) |
|
#define C10_LIKELY(expr) (__builtin_expect(static_cast<bool>(expr), 1)) |
|
#define C10_UNLIKELY(expr) (__builtin_expect(static_cast<bool>(expr), 0)) |
|
#else |
|
#define C10_LIKELY(expr) (expr) |
|
#define C10_UNLIKELY(expr) (expr) |
|
#endif |
|
|
|
|
|
|
|
#ifdef __GNUC__ |
|
#define C10_NOINLINE __attribute__((noinline)) |
|
#elif _MSC_VER |
|
#define C10_NOINLINE __declspec(noinline) |
|
#else |
|
#define C10_NOINLINE |
|
#endif |
|
|
|
#if defined(_MSC_VER) |
|
#define C10_ALWAYS_INLINE __forceinline |
|
#elif __has_attribute(always_inline) || defined(__GNUC__) |
|
#define C10_ALWAYS_INLINE __attribute__((__always_inline__)) inline |
|
#else |
|
#define C10_ALWAYS_INLINE inline |
|
#endif |
|
|
|
#if defined(_MSC_VER) |
|
#define C10_ATTR_VISIBILITY_HIDDEN |
|
#elif defined(__GNUC__) |
|
#define C10_ATTR_VISIBILITY_HIDDEN __attribute__((__visibility__("hidden"))) |
|
#else |
|
#define C10_ATTR_VISIBILITY_HIDDEN |
|
#endif |
|
|
|
#define C10_ERASE C10_ALWAYS_INLINE C10_ATTR_VISIBILITY_HIDDEN |
|
|
|
#include <cstdint> |
|
|
|
#ifdef __HIPCC__ |
|
|
|
|
|
|
|
#include <hip/hip_runtime.h> |
|
#endif |
|
|
|
#if defined(__CUDACC__) || defined(__HIPCC__) |
|
|
|
#define C10_HOST_DEVICE __host__ __device__ |
|
#define C10_DEVICE __device__ |
|
#define C10_HOST __host__ |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#if __CUDA_ARCH__ == 750 |
|
constexpr uint32_t CUDA_MAX_THREADS_PER_SM = 1024; |
|
#elif __CUDA_ARCH__ == 860 || __CUDA_ARCH__ == 870 || __CUDA_ARCH__ == 890 |
|
constexpr uint32_t CUDA_MAX_THREADS_PER_SM = 1536; |
|
#else |
|
constexpr uint32_t CUDA_MAX_THREADS_PER_SM = 2048; |
|
#endif |
|
|
|
constexpr uint32_t CUDA_MAX_THREADS_PER_BLOCK = 1024; |
|
|
|
|
|
|
|
constexpr uint32_t CUDA_THREADS_PER_BLOCK_FALLBACK = 256; |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#define C10_MAX_THREADS_PER_BLOCK(val) \ |
|
(((val) <= CUDA_MAX_THREADS_PER_BLOCK) ? (val) \ |
|
: CUDA_THREADS_PER_BLOCK_FALLBACK) |
|
#define C10_MIN_BLOCKS_PER_SM(threads_per_block, blocks_per_sm) \ |
|
((((threads_per_block) * (blocks_per_sm) <= CUDA_MAX_THREADS_PER_SM) \ |
|
? (blocks_per_sm) \ |
|
: ((CUDA_MAX_THREADS_PER_SM + (threads_per_block)-1) / \ |
|
(threads_per_block)))) |
|
|
|
#define C10_LAUNCH_BOUNDS_0 \ |
|
__launch_bounds__( \ |
|
256, 4) |
|
|
|
#define C10_LAUNCH_BOUNDS_1(max_threads_per_block) \ |
|
__launch_bounds__((C10_MAX_THREADS_PER_BLOCK((max_threads_per_block)))) |
|
#define C10_LAUNCH_BOUNDS_2(max_threads_per_block, min_blocks_per_sm) \ |
|
__launch_bounds__( \ |
|
(C10_MAX_THREADS_PER_BLOCK((max_threads_per_block))), \ |
|
(C10_MIN_BLOCKS_PER_SM((max_threads_per_block), (min_blocks_per_sm)))) |
|
#else |
|
#define C10_HOST_DEVICE |
|
#define C10_HOST |
|
#define C10_DEVICE |
|
#endif |
|
|
|
#if defined(USE_ROCM) |
|
#define C10_HIP_HOST_DEVICE __host__ __device__ |
|
#else |
|
#define C10_HIP_HOST_DEVICE |
|
#endif |
|
|
|
#if defined(USE_ROCM) |
|
#define C10_WARP_SIZE warpSize |
|
#else |
|
#define C10_WARP_SIZE 32 |
|
#endif |
|
|
|
#if defined(_MSC_VER) && _MSC_VER <= 1900 |
|
#define __func__ __FUNCTION__ |
|
#endif |
|
|
|
|
|
|
|
|
|
#if defined(__ANDROID__) || defined(__APPLE__) || defined(__FreeBSD__) |
|
|
|
#define CUDA_KERNEL_ASSERT(cond) |
|
#define SYCL_KERNEL_ASSERT(cond) |
|
#elif defined(_MSC_VER) |
|
#if defined(NDEBUG) |
|
extern "C" { |
|
C10_IMPORT |
|
#if defined(__SYCL_DEVICE_ONLY__) |
|
extern SYCL_EXTERNAL void _wassert( |
|
const wchar_t* wexpr, |
|
const wchar_t* wfile, |
|
unsigned line); |
|
#else |
|
#if defined(__CUDA_ARCH__) |
|
__host__ __device__ |
|
#endif |
|
void |
|
_wassert(wchar_t const* _Message, wchar_t const* _File, unsigned _Line); |
|
#endif |
|
} |
|
#endif |
|
#define CUDA_KERNEL_ASSERT(cond) \ |
|
if (C10_UNLIKELY(!(cond))) { \ |
|
(void)(_wassert( \ |
|
_CRT_WIDE(#cond), \ |
|
_CRT_WIDE(__FILE__), \ |
|
static_cast<unsigned>(__LINE__)), \ |
|
0); \ |
|
} |
|
#define SYCL_KERNEL_ASSERT(cond) \ |
|
if (C10_UNLIKELY(!(cond))) { \ |
|
(void)(_wassert( \ |
|
_CRT_WIDE(#cond), \ |
|
_CRT_WIDE(__FILE__), \ |
|
static_cast<unsigned>(__LINE__)), \ |
|
0); \ |
|
} |
|
#else |
|
#if defined(NDEBUG) |
|
extern "C" { |
|
#if defined(__SYCL_DEVICE_ONLY__) |
|
extern SYCL_EXTERNAL void __assert_fail( |
|
const char* expr, |
|
const char* file, |
|
unsigned int line, |
|
const char* func); |
|
#else |
|
#if (defined(__CUDA_ARCH__) && !(defined(__clang__) && defined(__CUDA__))) |
|
|
|
|
|
__host__ __device__ |
|
#endif |
|
|
|
|
|
|
|
|
|
|
|
void |
|
__assert_fail( |
|
const char* assertion, |
|
const char* file, |
|
unsigned int line, |
|
const char* function) noexcept __attribute__((__noreturn__)); |
|
|
|
#endif |
|
} |
|
#endif |
|
|
|
#if !defined(C10_USE_ROCM_KERNEL_ASSERT) and defined(USE_ROCM) |
|
#define CUDA_KERNEL_ASSERT(cond) |
|
#define SYCL_KERNEL_ASSERT(cond) |
|
#else |
|
#define CUDA_KERNEL_ASSERT(cond) \ |
|
if (C10_UNLIKELY(!(cond))) { \ |
|
__assert_fail( \ |
|
#cond, __FILE__, static_cast<unsigned int>(__LINE__), __func__); \ |
|
} |
|
#define SYCL_KERNEL_ASSERT(cond) \ |
|
if (C10_UNLIKELY(!(cond))) { \ |
|
__assert_fail( \ |
|
#cond, __FILE__, static_cast<unsigned int>(__LINE__), __func__); \ |
|
} |
|
#endif |
|
#endif |
|
|
|
#ifdef __APPLE__ |
|
#include <TargetConditionals.h> |
|
#endif |
|
|
|
#if defined(__ANDROID__) |
|
#define C10_ANDROID 1 |
|
#define C10_MOBILE 1 |
|
#elif ( \ |
|
defined(__APPLE__) && \ |
|
(TARGET_IPHONE_SIMULATOR || TARGET_OS_SIMULATOR || TARGET_OS_IPHONE)) |
|
#define C10_IOS 1 |
|
#define C10_MOBILE 1 |
|
#endif |
|
|
|
#if defined(C10_MOBILE) && C10_MOBILE |
|
#define C10_ALWAYS_INLINE_UNLESS_MOBILE inline |
|
#else |
|
#define C10_ALWAYS_INLINE_UNLESS_MOBILE C10_ALWAYS_INLINE |
|
#endif |
|
|
|
#if defined(__CUDA_ARCH__) |
|
#if defined(_MSC_VER) && defined(__CUDACC__) |
|
#define CONSTEXPR_EXCEPT_WIN_CUDA const |
|
#define C10_HOST_CONSTEXPR_EXCEPT_WIN_CUDA __host__ |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#define STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(field, val) \ |
|
static const char* field; |
|
#define STATIC_CONST_STR_OUT_OF_LINE_FOR_WIN_CUDA(cls, field, val) \ |
|
const char* cls::field = val; |
|
#else |
|
#define CONSTEXPR_EXCEPT_WIN_CUDA constexpr |
|
#define C10_HOST_CONSTEXPR_EXCEPT_WIN_CUDA __host__ |
|
|
|
#define STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(field, val) \ |
|
static constexpr const char* field = val; |
|
#define STATIC_CONST_STR_OUT_OF_LINE_FOR_WIN_CUDA(cls, field, val) |
|
#endif |
|
#else |
|
#if defined(_MSC_VER) && defined(__CUDACC__) |
|
#define CONSTEXPR_EXCEPT_WIN_CUDA const |
|
#define C10_HOST_CONSTEXPR_EXCEPT_WIN_CUDA |
|
|
|
#define STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(field, val) \ |
|
static const char* field; |
|
#define STATIC_CONST_STR_OUT_OF_LINE_FOR_WIN_CUDA(cls, field, val) \ |
|
const char* cls::field = val; |
|
#else |
|
#define CONSTEXPR_EXCEPT_WIN_CUDA constexpr |
|
#define C10_HOST_CONSTEXPR_EXCEPT_WIN_CUDA constexpr |
|
|
|
#define STATIC_CONSTEXPR_STR_INL_EXCEPT_WIN_CUDA(field, val) \ |
|
static constexpr const char* field = val; |
|
#define STATIC_CONST_STR_OUT_OF_LINE_FOR_WIN_CUDA(cls, field, val) |
|
#endif |
|
#endif |
|
|
|
#ifndef HAS_DEMANGLE |
|
#if defined(__ANDROID__) || defined(_WIN32) || defined(__EMSCRIPTEN__) |
|
#define HAS_DEMANGLE 0 |
|
#elif defined(__APPLE__) && \ |
|
(TARGET_IPHONE_SIMULATOR || TARGET_OS_SIMULATOR || TARGET_OS_IPHONE) |
|
#define HAS_DEMANGLE 0 |
|
#else |
|
#define HAS_DEMANGLE 1 |
|
#endif |
|
#endif |
|
|
|
#define _C10_PRAGMA__(string) _Pragma(#string) |
|
#define _C10_PRAGMA_(string) _C10_PRAGMA__(string) |
|
|
|
#ifdef __clang__ |
|
#define C10_CLANG_DIAGNOSTIC_PUSH() _Pragma("clang diagnostic push") |
|
#define C10_CLANG_DIAGNOSTIC_POP() _Pragma("clang diagnostic pop") |
|
#define C10_CLANG_DIAGNOSTIC_IGNORE(flag) \ |
|
_C10_PRAGMA_(clang diagnostic ignored flag) |
|
#define C10_CLANG_HAS_WARNING(flag) __has_warning(flag) |
|
#else |
|
#define C10_CLANG_DIAGNOSTIC_PUSH() |
|
#define C10_CLANG_DIAGNOSTIC_POP() |
|
#define C10_CLANG_DIAGNOSTIC_IGNORE(flag) |
|
#define C10_CLANG_HAS_WARNING(flag) 0 |
|
#endif |
|
|
|
#ifdef __clang__ |
|
|
|
#define C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED(warning) \ |
|
_C10_PRAGMA_(clang diagnostic push) \ |
|
_C10_PRAGMA_(clang diagnostic ignored "-Wunknown-warning-option") \ |
|
_C10_PRAGMA_(clang diagnostic ignored warning) |
|
|
|
#define C10_DIAGNOSTIC_POP() _C10_PRAGMA_(clang diagnostic pop) |
|
|
|
#elif __GNUC__ |
|
|
|
#define C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED(warning) \ |
|
_C10_PRAGMA_(GCC diagnostic push) \ |
|
_C10_PRAGMA_(GCC diagnostic ignored "-Wpragmas") \ |
|
_C10_PRAGMA_(GCC diagnostic ignored warning) |
|
|
|
#define C10_DIAGNOSTIC_POP() _C10_PRAGMA_(GCC diagnostic pop) |
|
|
|
#else |
|
|
|
#define C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED(warning) |
|
#define C10_DIAGNOSTIC_POP() |
|
|
|
#endif |
|
|
|
#endif |
|
|