Do0rMaMu's picture
Upload folder using huggingface_hub
e45d058 verified
# Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: BSD-3-Clause
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# 3. Neither the name of the copyright holder nor the names of its
# contributors may be used to endorse or promote products derived from
# this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
# CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
# OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
if(CUDA_COMPILER MATCHES "[Cc]lang")
set(CUTLASS_NATIVE_CUDA_INIT ON)
elseif(CMAKE_VERSION VERSION_LESS 3.12.4)
set(CUTLASS_NATIVE_CUDA_INIT OFF)
else()
set(CUTLASS_NATIVE_CUDA_INIT ON)
endif()
set(CUTLASS_NATIVE_CUDA ${CUTLASS_NATIVE_CUDA_INIT} CACHE BOOL "Utilize the CMake native CUDA flow")
if(NOT DEFINED ENV{CUDACXX} AND NOT DEFINED ENV{CUDA_BIN_PATH} AND DEFINED ENV{CUDA_PATH})
# For backward compatibility, allow use of CUDA_PATH.
set(ENV{CUDACXX} $ENV{CUDA_PATH}/bin/nvcc)
endif()
if(CUTLASS_NATIVE_CUDA)
enable_language(CUDA)
if(NOT CUDA_VERSION)
set(CUDA_VERSION ${CMAKE_CUDA_COMPILER_VERSION})
endif()
if(NOT CUDA_TOOLKIT_ROOT_DIR)
get_filename_component(CUDA_TOOLKIT_ROOT_DIR "${CMAKE_CUDA_COMPILER}/../.." ABSOLUTE)
endif()
else()
find_package(CUDA REQUIRED)
# We workaround missing variables with the native flow by also finding the CUDA toolkit the old way.
if(NOT CMAKE_CUDA_COMPILER_VERSION)
set(CMAKE_CUDA_COMPILER_VERSION ${CUDA_VERSION})
endif()
endif()
if (CUDA_VERSION VERSION_LESS 9.2)
message(FATAL_ERROR "CUDA 9.2+ Required, Found ${CUDA_VERSION}.")
endif()
if(NOT CUTLASS_NATIVE_CUDA OR CUDA_COMPILER MATCHES "[Cc]lang")
set(CMAKE_CUDA_COMPILER ${CUDA_TOOLKIT_ROOT_DIR}/bin/nvcc)
message(STATUS "CUDA Compiler: ${CMAKE_CUDA_COMPILER}")
endif()
find_library(
CUDART_LIBRARY cudart
PATHS
${CUDA_TOOLKIT_ROOT_DIR}
PATH_SUFFIXES
lib/x86_64-linux-gnu
lib/x64
lib64
lib
NO_DEFAULT_PATH
# We aren't going to search any system paths. We want to find the runtime
# in the CUDA toolkit we're building against.
)
if(NOT TARGET cudart AND CUDART_LIBRARY)
message(STATUS "CUDART: ${CUDART_LIBRARY}")
if(WIN32)
add_library(cudart STATIC IMPORTED GLOBAL)
# Even though we're linking against a .dll, in Windows you statically link against
# the .lib file found under lib/x64. The .dll will be loaded at runtime automatically
# from the PATH search.
else()
add_library(cudart SHARED IMPORTED GLOBAL)
endif()
add_library(nvidia::cudart ALIAS cudart)
set_property(
TARGET cudart
PROPERTY IMPORTED_LOCATION
${CUDART_LIBRARY}
)
elseif(TARGET cudart)
message(STATUS "CUDART: Already Found")
else()
message(STATUS "CUDART: Not Found")
endif()
find_library(
CUDA_DRIVER_LIBRARY cuda
PATHS
${CUDA_TOOLKIT_ROOT_DIR}
PATH_SUFFIXES
lib/x86_64-linux-gnu
lib/x64
lib64
lib
lib64/stubs
lib/stubs
NO_DEFAULT_PATH
# We aren't going to search any system paths. We want to find the runtime
# in the CUDA toolkit we're building against.
)
if(NOT TARGET cuda_driver AND CUDA_DRIVER_LIBRARY)
message(STATUS "CUDA Driver: ${CUDA_DRIVER_LIBRARY}")
if(WIN32)
add_library(cuda_driver STATIC IMPORTED GLOBAL)
# Even though we're linking against a .dll, in Windows you statically link against
# the .lib file found under lib/x64. The .dll will be loaded at runtime automatically
# from the PATH search.
else()
add_library(cuda_driver SHARED IMPORTED GLOBAL)
endif()
add_library(nvidia::cuda_driver ALIAS cuda_driver)
set_property(
TARGET cuda_driver
PROPERTY IMPORTED_LOCATION
${CUDA_DRIVER_LIBRARY}
)
elseif(TARGET cuda_driver)
message(STATUS "CUDA Driver: Already Found")
else()
message(STATUS "CUDA Driver: Not Found")
endif()
find_library(
NVRTC_LIBRARY nvrtc
PATHS
${CUDA_TOOLKIT_ROOT_DIR}
PATH_SUFFIXES
lib/x64
lib64
lib
NO_DEFAULT_PATH
# We aren't going to search any system paths. We want to find the runtime
# in the CUDA toolkit we're building against.
)
if(NOT TARGET nvrtc AND NVRTC_LIBRARY)
message(STATUS "NVRTC: ${NVRTC_LIBRARY}")
if(WIN32)
add_library(nvrtc STATIC IMPORTED GLOBAL)
# Even though we're linking against a .dll, in Windows you statically link against
# the .lib file found under lib/x64. The .dll will be loaded at runtime automatically
# from the PATH search.
else()
add_library(nvrtc SHARED IMPORTED GLOBAL)
endif()
add_library(nvidia::nvrtc ALIAS nvrtc)
set_property(
TARGET nvrtc
PROPERTY IMPORTED_LOCATION
${NVRTC_LIBRARY}
)
elseif(TARGET nvrtc)
message(STATUS "NVRTC: Already Found")
else()
message(STATUS "NVRTC: Not Found")
endif()
include_directories(SYSTEM ${CUDA_INCLUDE_DIRS})
# Some platforms (e.g. Visual Studio) don't add the CUDA include directories to the system include
# paths by default, so we add it explicitly here.
function(cutlass_correct_source_file_language_property)
if(CUDA_COMPILER MATCHES "[Cc]lang")
foreach(File ${ARGN})
if(File MATCHES ".*\.cu$")
set_source_files_properties(${File} PROPERTIES LANGUAGE CXX)
endif()
endforeach()
endif()
endfunction()
if (MSVC OR CUTLASS_LIBRARY_KERNELS MATCHES "all")
set(CUTLASS_UNITY_BUILD_ENABLED_INIT ON)
else()
set(CUTLASS_UNITY_BUILD_ENABLED_INIT OFF)
endif()
set(CUTLASS_UNITY_BUILD_ENABLED ${CUTLASS_UNITY_BUILD_ENABLED_INIT} CACHE BOOL "Enable combined source compilation")
if (MSVC)
set(CUTLASS_UNITY_BUILD_BATCH_SIZE_INIT 8)
else()
set(CUTLASS_UNITY_BUILD_BATCH_SIZE_INIT 16)
endif()
set(CUTLASS_UNITY_BUILD_BATCH_SIZE ${CUTLASS_UNITY_BUILD_BATCH_SIZE_INIT} CACHE STRING "Batch size for unified source files")
function(cutlass_unify_source_files TARGET_ARGS_VAR)
set(options)
set(oneValueArgs BATCH_SOURCES BATCH_SIZE)
set(multiValueArgs)
cmake_parse_arguments(_ "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
if (NOT DEFINED TARGET_ARGS_VAR)
message(FATAL_ERROR "TARGET_ARGS_VAR parameter is required")
endif()
if (NOT DEFINED __BATCH_SOURCES)
set(__BATCH_SOURCES ON)
endif()
if (__BATCH_SOURCES AND NOT DEFINED __BATCH_SIZE)
set(__BATCH_SIZE ${CUTLASS_UNITY_BUILD_BATCH_SIZE})
endif()
if (CUTLASS_UNITY_BUILD_ENABLED AND __BATCH_SOURCES AND __BATCH_SIZE GREATER 1)
set(CUDA_FILE_ARGS)
set(TARGET_SOURCE_ARGS)
foreach(ARG ${__UNPARSED_ARGUMENTS})
if(${ARG} MATCHES ".*\.cu$")
list(APPEND CUDA_FILE_ARGS ${ARG})
else()
list(APPEND TARGET_SOURCE_ARGS ${ARG})
endif()
endforeach()
list(LENGTH CUDA_FILE_ARGS NUM_CUDA_FILE_ARGS)
while(NUM_CUDA_FILE_ARGS GREATER 0)
list(SUBLIST CUDA_FILE_ARGS 0 ${__BATCH_SIZE} CUDA_FILE_BATCH)
string(SHA256 CUDA_FILE_BATCH_HASH "${CUDA_FILE_BATCH}")
string(SUBSTRING ${CUDA_FILE_BATCH_HASH} 0 12 CUDA_FILE_BATCH_HASH)
set(BATCH_FILE ${CMAKE_CURRENT_BINARY_DIR}/${NAME}.unity.${CUDA_FILE_BATCH_HASH}.cu)
message(STATUS "Generating ${BATCH_FILE}")
file(WRITE ${BATCH_FILE} "// Unity File - Auto Generated!\n")
foreach(CUDA_FILE ${CUDA_FILE_BATCH})
get_filename_component(CUDA_FILE_ABS_PATH ${CUDA_FILE} ABSOLUTE)
file(APPEND ${BATCH_FILE} "#include \"${CUDA_FILE_ABS_PATH}\"\n")
endforeach()
list(APPEND TARGET_SOURCE_ARGS ${BATCH_FILE})
if (NUM_CUDA_FILE_ARGS LESS_EQUAL __BATCH_SIZE)
break()
endif()
list(SUBLIST CUDA_FILE_ARGS ${__BATCH_SIZE} -1 CUDA_FILE_ARGS)
list(LENGTH CUDA_FILE_ARGS NUM_CUDA_FILE_ARGS)
endwhile()
else()
set(TARGET_SOURCE_ARGS ${__UNPARSED_ARGUMENTS})
endif()
set(${TARGET_ARGS_VAR} ${TARGET_SOURCE_ARGS} PARENT_SCOPE)
endfunction()
function(cutlass_add_library NAME)
set(options SKIP_GENCODE_FLAGS)
set(oneValueArgs EXPORT_NAME)
set(multiValueArgs)
cmake_parse_arguments(_ "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
cutlass_unify_source_files(TARGET_SOURCE_ARGS ${__UNPARSED_ARGUMENTS})
if(CUTLASS_NATIVE_CUDA OR CUDA_COMPILER MATCHES "clang")
cutlass_correct_source_file_language_property(${TARGET_SOURCE_ARGS})
add_library(${NAME} ${TARGET_SOURCE_ARGS} "")
else()
set(CUDA_LINK_LIBRARIES_KEYWORD PRIVATE)
cuda_add_library(${NAME} ${TARGET_SOURCE_ARGS} "")
endif()
cutlass_apply_standard_compile_options(${NAME})
if (NOT __SKIP_GENCODE_FLAGS)
cutlass_apply_cuda_gencode_flags(${NAME})
endif()
target_compile_features(
${NAME}
INTERFACE
cxx_std_11
)
get_target_property(TARGET_TYPE ${NAME} TYPE)
if (TARGET_TYPE MATCHES "SHARED")
set_target_properties(${NAME} PROPERTIES CUDA_RUNTIME_LIBRARY Shared)
elseif(TARGET_TYPE MATCHES "STATIC")
set_target_properties(${NAME} PROPERTIES CUDA_RUNTIME_LIBRARY Static)
endif()
if(__EXPORT_NAME)
add_library(nvidia::cutlass::${__EXPORT_NAME} ALIAS ${NAME})
set_target_properties(${NAME} PROPERTIES EXPORT_NAME ${__EXPORT_NAME})
endif()
endfunction()
function(cutlass_add_executable NAME)
set(options)
set(oneValueArgs CUDA_RUNTIME_LIBRARY)
set(multiValueArgs)
cmake_parse_arguments(_ "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
if (NOT DEFINED __CUDA_RUNTIME_LIBRARY)
set(__CUDA_RUNTIME_LIBRARY Shared)
endif()
set(__CUDA_RUNTIME_LIBRARY_ALLOWED None Shared Static)
if (NOT __CUDA_RUNTIME_LIBRARY IN_LIST __CUDA_RUNTIME_LIBRARY_ALLOWED)
message(FATAL_ERROR "CUDA_RUNTIME_LIBRARY value '${__CUDA_RUNTIME_LIBRARY}' is not in allowed list of '${__CUDA_RUNTIME_LIBRARY_ALLOWED}'")
endif()
cutlass_unify_source_files(TARGET_SOURCE_ARGS ${__UNPARSED_ARGUMENTS})
if(CUTLASS_NATIVE_CUDA OR CUDA_COMPILER MATCHES "clang")
cutlass_correct_source_file_language_property(${TARGET_SOURCE_ARGS})
add_executable(${NAME} ${TARGET_SOURCE_ARGS})
else()
set(CUDA_LINK_LIBRARIES_KEYWORD PRIVATE)
cuda_add_executable(${NAME} ${TARGET_SOURCE_ARGS})
endif()
cutlass_apply_standard_compile_options(${NAME})
cutlass_apply_cuda_gencode_flags(${NAME})
target_compile_features(
${NAME}
INTERFACE
cxx_std_11
)
set_target_properties(${NAME} PROPERTIES CUDA_RUNTIME_LIBRARY ${__CUDA_RUNTIME_LIBRARY})
endfunction()
function(cutlass_target_sources NAME)
set(options)
set(oneValueArgs)
set(multiValueArgs)
cmake_parse_arguments(_ "${options}" "${oneValueArgs}" "${multiValueArgs}" ${ARGN})
cutlass_unify_source_files(TARGET_SOURCE_ARGS ${__UNPARSED_ARGUMENTS})
cutlass_correct_source_file_language_property(${TARGET_SOURCE_ARGS})
target_sources(${NAME} ${TARGET_SOURCE_ARGS})
endfunction()