Spaces:
Sleeping
Sleeping
# Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. | |
# SPDX-License-Identifier: BSD-3-Clause | |
# | |
# Redistribution and use in source and binary forms, with or without | |
# modification, are permitted provided that the following conditions are met: | |
# | |
# 1. Redistributions of source code must retain the above copyright notice, this | |
# list of conditions and the following disclaimer. | |
# | |
# 2. Redistributions in binary form must reproduce the above copyright notice, | |
# this list of conditions and the following disclaimer in the documentation | |
# and/or other materials provided with the distribution. | |
# | |
# 3. Neither the name of the copyright holder nor the names of its | |
# contributors may be used to endorse or promote products derived from | |
# this software without specific prior written permission. | |
# | |
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE | |
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
if(CUDA_COMPILER MATCHES "[Cc]lang") | |
set(CUTLASS_NATIVE_CUDA_INIT ON) | |
elseif(CMAKE_VERSION VERSION_LESS 3.12.4) | |
set(CUTLASS_NATIVE_CUDA_INIT OFF) | |
else() | |
set(CUTLASS_NATIVE_CUDA_INIT ON) | |
endif() | |
set(CUTLASS_NATIVE_CUDA ${CUTLASS_NATIVE_CUDA_INIT} CACHE BOOL "Utilize the CMake native CUDA flow") | |
if(NOT DEFINED ENV{CUDACXX} AND NOT DEFINED ENV{CUDA_BIN_PATH} AND DEFINED ENV{CUDA_PATH}) | |
# For backward compatibility, allow use of CUDA_PATH. | |
set(ENV{CUDACXX} $ENV{CUDA_PATH}/bin/nvcc) | |
endif() | |
if(CUTLASS_NATIVE_CUDA) | |
enable_language(CUDA) | |
if(NOT CUDA_VERSION) | |
set(CUDA_VERSION ${CMAKE_CUDA_COMPILER_VERSION}) | |
endif() | |
if(NOT CUDA_TOOLKIT_ROOT_DIR) | |
get_filename_component(CUDA_TOOLKIT_ROOT_DIR "${CMAKE_CUDA_COMPILER}/../.." ABSOLUTE) | |
endif() | |
else() | |
find_package(CUDA REQUIRED) | |
# We workaround missing variables with the native flow by also finding the CUDA toolkit the old way. | |
if(NOT CMAKE_CUDA_COMPILER_VERSION) | |
set(CMAKE_CUDA_COMPILER_VERSION ${CUDA_VERSION}) | |
endif() | |
endif() | |
if (CUDA_VERSION VERSION_LESS 9.2) | |
message(FATAL_ERROR "CUDA 9.2+ Required, Found ${CUDA_VERSION}.") | |
endif() | |
if(NOT CUTLASS_NATIVE_CUDA OR CUDA_COMPILER MATCHES "[Cc]lang") | |
set(CMAKE_CUDA_COMPILER ${CUDA_TOOLKIT_ROOT_DIR}/bin/nvcc) | |
message(STATUS "CUDA Compiler: ${CMAKE_CUDA_COMPILER}") | |
endif() | |
find_library( | |
CUDART_LIBRARY cudart | |
PATHS | |
${CUDA_TOOLKIT_ROOT_DIR} | |
PATH_SUFFIXES | |
lib/x86_64-linux-gnu | |
lib/x64 | |
lib64 | |
lib | |
NO_DEFAULT_PATH | |
# We aren't going to search any system paths. We want to find the runtime | |
# in the CUDA toolkit we're building against. | |
) | |
if(NOT TARGET cudart AND CUDART_LIBRARY) | |
message(STATUS "CUDART: ${CUDART_LIBRARY}") | |
if(WIN32) | |
add_library(cudart STATIC IMPORTED GLOBAL) | |
# Even though we're linking against a .dll, in Windows you statically link against | |
# the .lib file found under lib/x64. The .dll will be loaded at runtime automatically | |
# from the PATH search. | |
else() | |
add_library(cudart SHARED IMPORTED GLOBAL) | |
endif() | |
add_library(nvidia::cudart ALIAS cudart) | |
set_property( | |
TARGET cudart | |
PROPERTY IMPORTED_LOCATION | |
${CUDART_LIBRARY} | |
) | |
elseif(TARGET cudart) | |
message(STATUS "CUDART: Already Found") | |
else() | |
message(STATUS "CUDART: Not Found") | |
endif() | |
find_library( | |
CUDA_DRIVER_LIBRARY cuda | |
PATHS | |
${CUDA_TOOLKIT_ROOT_DIR} | |
PATH_SUFFIXES | |
lib/x86_64-linux-gnu | |
lib/x64 | |
lib64 | |
lib | |
lib64/stubs | |
lib/stubs | |
NO_DEFAULT_PATH | |
# We aren't going to search any system paths. We want to find the runtime | |
# in the CUDA toolkit we're building against. | |
) | |
if(NOT TARGET cuda_driver AND CUDA_DRIVER_LIBRARY) | |
message(STATUS "CUDA Driver: ${CUDA_DRIVER_LIBRARY}") | |
if(WIN32) | |
add_library(cuda_driver STATIC IMPORTED GLOBAL) | |
# Even though we're linking against a .dll, in Windows you statically link against | |
# the .lib file found under lib/x64. The .dll will be loaded at runtime automatically | |
# from the PATH search. | |
else() | |
add_library(cuda_driver SHARED IMPORTED GLOBAL) | |
endif() | |
add_library(nvidia::cuda_driver ALIAS cuda_driver) | |
set_property( | |
TARGET cuda_driver | |
PROPERTY IMPORTED_LOCATION | |
${CUDA_DRIVER_LIBRARY} | |
) | |
elseif(TARGET cuda_driver) | |
message(STATUS "CUDA Driver: Already Found") | |
else() | |
message(STATUS "CUDA Driver: Not Found") | |
endif() | |
find_library( | |
NVRTC_LIBRARY nvrtc | |
PATHS | |
${CUDA_TOOLKIT_ROOT_DIR} | |
PATH_SUFFIXES | |
lib/x64 | |
lib64 | |
lib | |
NO_DEFAULT_PATH | |
# We aren't going to search any system paths. We want to find the runtime | |
# in the CUDA toolkit we're building against. | |
) | |
if(NOT TARGET nvrtc AND NVRTC_LIBRARY) | |
message(STATUS "NVRTC: ${NVRTC_LIBRARY}") | |
if(WIN32) | |
add_library(nvrtc STATIC IMPORTED GLOBAL) | |
# Even though we're linking against a .dll, in Windows you statically link against | |
# the .lib file found under lib/x64. The .dll will be loaded at runtime automatically | |
# from the PATH search. | |
else() | |
add_library(nvrtc SHARED IMPORTED GLOBAL) | |
endif() | |
add_library(nvidia::nvrtc ALIAS nvrtc) | |
set_property( | |
TARGET nvrtc | |
PROPERTY IMPORTED_LOCATION | |
${NVRTC_LIBRARY} | |
) | |
elseif(TARGET nvrtc) | |
message(STATUS "NVRTC: Already Found") | |
else() | |
message(STATUS "NVRTC: Not Found") | |
endif() | |
include_directories(SYSTEM ${CUDA_INCLUDE_DIRS}) | |
# Some platforms (e.g. Visual Studio) don't add the CUDA include directories to the system include | |
# paths by default, so we add it explicitly here. | |
function(cutlass_correct_source_file_language_property) | |
if(CUDA_COMPILER MATCHES "[Cc]lang") | |
foreach(File ${ARGN}) | |
if(File MATCHES ".*\.cu$") | |
set_source_files_properties(${File} PROPERTIES LANGUAGE CXX) | |
endif() | |
endforeach() | |
endif() | |
endfunction() | |
if (MSVC OR CUTLASS_LIBRARY_KERNELS MATCHES "all") | |
set(CUTLASS_UNITY_BUILD_ENABLED_INIT ON) | |
else() | |
set(CUTLASS_UNITY_BUILD_ENABLED_INIT OFF) | |
endif() | |
set(CUTLASS_UNITY_BUILD_ENABLED ${CUTLASS_UNITY_BUILD_ENABLED_INIT} CACHE BOOL "Enable combined source compilation") | |
if (MSVC) | |
set(CUTLASS_UNITY_BUILD_BATCH_SIZE_INIT 8) | |
else() | |
set(CUTLASS_UNITY_BUILD_BATCH_SIZE_INIT 16) | |
endif() | |
set(CUTLASS_UNITY_BUILD_BATCH_SIZE ${CUTLASS_UNITY_BUILD_BATCH_SIZE_INIT} CACHE STRING "Batch size for unified source files") | |
function(cutlass_unify_source_files TARGET_ARGS_VAR) | |
set(options) | |
set(oneValueArgs BATCH_SOURCES BATCH_SIZE) | |
set(multiValueArgs) | |
cmake_parse_arguments(_ "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) | |
if (NOT DEFINED TARGET_ARGS_VAR) | |
message(FATAL_ERROR "TARGET_ARGS_VAR parameter is required") | |
endif() | |
if (NOT DEFINED __BATCH_SOURCES) | |
set(__BATCH_SOURCES ON) | |
endif() | |
if (__BATCH_SOURCES AND NOT DEFINED __BATCH_SIZE) | |
set(__BATCH_SIZE ${CUTLASS_UNITY_BUILD_BATCH_SIZE}) | |
endif() | |
if (CUTLASS_UNITY_BUILD_ENABLED AND __BATCH_SOURCES AND __BATCH_SIZE GREATER 1) | |
set(CUDA_FILE_ARGS) | |
set(TARGET_SOURCE_ARGS) | |
foreach(ARG ${__UNPARSED_ARGUMENTS}) | |
if(${ARG} MATCHES ".*\.cu$") | |
list(APPEND CUDA_FILE_ARGS ${ARG}) | |
else() | |
list(APPEND TARGET_SOURCE_ARGS ${ARG}) | |
endif() | |
endforeach() | |
list(LENGTH CUDA_FILE_ARGS NUM_CUDA_FILE_ARGS) | |
while(NUM_CUDA_FILE_ARGS GREATER 0) | |
list(SUBLIST CUDA_FILE_ARGS 0 ${__BATCH_SIZE} CUDA_FILE_BATCH) | |
string(SHA256 CUDA_FILE_BATCH_HASH "${CUDA_FILE_BATCH}") | |
string(SUBSTRING ${CUDA_FILE_BATCH_HASH} 0 12 CUDA_FILE_BATCH_HASH) | |
set(BATCH_FILE ${CMAKE_CURRENT_BINARY_DIR}/${NAME}.unity.${CUDA_FILE_BATCH_HASH}.cu) | |
message(STATUS "Generating ${BATCH_FILE}") | |
file(WRITE ${BATCH_FILE} "// Unity File - Auto Generated!\n") | |
foreach(CUDA_FILE ${CUDA_FILE_BATCH}) | |
get_filename_component(CUDA_FILE_ABS_PATH ${CUDA_FILE} ABSOLUTE) | |
file(APPEND ${BATCH_FILE} "#include \"${CUDA_FILE_ABS_PATH}\"\n") | |
endforeach() | |
list(APPEND TARGET_SOURCE_ARGS ${BATCH_FILE}) | |
if (NUM_CUDA_FILE_ARGS LESS_EQUAL __BATCH_SIZE) | |
break() | |
endif() | |
list(SUBLIST CUDA_FILE_ARGS ${__BATCH_SIZE} -1 CUDA_FILE_ARGS) | |
list(LENGTH CUDA_FILE_ARGS NUM_CUDA_FILE_ARGS) | |
endwhile() | |
else() | |
set(TARGET_SOURCE_ARGS ${__UNPARSED_ARGUMENTS}) | |
endif() | |
set(${TARGET_ARGS_VAR} ${TARGET_SOURCE_ARGS} PARENT_SCOPE) | |
endfunction() | |
function(cutlass_add_library NAME) | |
set(options SKIP_GENCODE_FLAGS) | |
set(oneValueArgs EXPORT_NAME) | |
set(multiValueArgs) | |
cmake_parse_arguments(_ "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) | |
cutlass_unify_source_files(TARGET_SOURCE_ARGS ${__UNPARSED_ARGUMENTS}) | |
if(CUTLASS_NATIVE_CUDA OR CUDA_COMPILER MATCHES "clang") | |
cutlass_correct_source_file_language_property(${TARGET_SOURCE_ARGS}) | |
add_library(${NAME} ${TARGET_SOURCE_ARGS} "") | |
else() | |
set(CUDA_LINK_LIBRARIES_KEYWORD PRIVATE) | |
cuda_add_library(${NAME} ${TARGET_SOURCE_ARGS} "") | |
endif() | |
cutlass_apply_standard_compile_options(${NAME}) | |
if (NOT __SKIP_GENCODE_FLAGS) | |
cutlass_apply_cuda_gencode_flags(${NAME}) | |
endif() | |
target_compile_features( | |
${NAME} | |
INTERFACE | |
cxx_std_11 | |
) | |
get_target_property(TARGET_TYPE ${NAME} TYPE) | |
if (TARGET_TYPE MATCHES "SHARED") | |
set_target_properties(${NAME} PROPERTIES CUDA_RUNTIME_LIBRARY Shared) | |
elseif(TARGET_TYPE MATCHES "STATIC") | |
set_target_properties(${NAME} PROPERTIES CUDA_RUNTIME_LIBRARY Static) | |
endif() | |
if(__EXPORT_NAME) | |
add_library(nvidia::cutlass::${__EXPORT_NAME} ALIAS ${NAME}) | |
set_target_properties(${NAME} PROPERTIES EXPORT_NAME ${__EXPORT_NAME}) | |
endif() | |
endfunction() | |
function(cutlass_add_executable NAME) | |
set(options) | |
set(oneValueArgs CUDA_RUNTIME_LIBRARY) | |
set(multiValueArgs) | |
cmake_parse_arguments(_ "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) | |
if (NOT DEFINED __CUDA_RUNTIME_LIBRARY) | |
set(__CUDA_RUNTIME_LIBRARY Shared) | |
endif() | |
set(__CUDA_RUNTIME_LIBRARY_ALLOWED None Shared Static) | |
if (NOT __CUDA_RUNTIME_LIBRARY IN_LIST __CUDA_RUNTIME_LIBRARY_ALLOWED) | |
message(FATAL_ERROR "CUDA_RUNTIME_LIBRARY value '${__CUDA_RUNTIME_LIBRARY}' is not in allowed list of '${__CUDA_RUNTIME_LIBRARY_ALLOWED}'") | |
endif() | |
cutlass_unify_source_files(TARGET_SOURCE_ARGS ${__UNPARSED_ARGUMENTS}) | |
if(CUTLASS_NATIVE_CUDA OR CUDA_COMPILER MATCHES "clang") | |
cutlass_correct_source_file_language_property(${TARGET_SOURCE_ARGS}) | |
add_executable(${NAME} ${TARGET_SOURCE_ARGS}) | |
else() | |
set(CUDA_LINK_LIBRARIES_KEYWORD PRIVATE) | |
cuda_add_executable(${NAME} ${TARGET_SOURCE_ARGS}) | |
endif() | |
cutlass_apply_standard_compile_options(${NAME}) | |
cutlass_apply_cuda_gencode_flags(${NAME}) | |
target_compile_features( | |
${NAME} | |
INTERFACE | |
cxx_std_11 | |
) | |
set_target_properties(${NAME} PROPERTIES CUDA_RUNTIME_LIBRARY ${__CUDA_RUNTIME_LIBRARY}) | |
endfunction() | |
function(cutlass_target_sources NAME) | |
set(options) | |
set(oneValueArgs) | |
set(multiValueArgs) | |
cmake_parse_arguments(_ "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN}) | |
cutlass_unify_source_files(TARGET_SOURCE_ARGS ${__UNPARSED_ARGUMENTS}) | |
cutlass_correct_source_file_language_property(${TARGET_SOURCE_ARGS}) | |
target_sources(${NAME} ${TARGET_SOURCE_ARGS}) | |
endfunction() | |