Spaces:
Sleeping
Sleeping
/*************************************************************************************************** | |
* Copyright (c) 2017 - 2024 NVIDIA CORPORATION & AFFILIATES. All rights reserved. | |
* SPDX-License-Identifier: BSD-3-Clause | |
* | |
* Redistribution and use in source and binary forms, with or without | |
* modification, are permitted provided that the following conditions are met: | |
* | |
* 1. Redistributions of source code must retain the above copyright notice, this | |
* list of conditions and the following disclaimer. | |
* | |
* 2. Redistributions in binary form must reproduce the above copyright notice, | |
* this list of conditions and the following disclaimer in the documentation | |
* and/or other materials provided with the distribution. | |
* | |
* 3. Neither the name of the copyright holder nor the names of its | |
* contributors may be used to endorse or promote products derived from | |
* this software without specific prior written permission. | |
* | |
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" | |
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | |
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE | |
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE | |
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL | |
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER | |
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, | |
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | |
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | |
* | |
**************************************************************************************************/ | |
/*! \file | |
\brief Unit tests for thread-level GEMM | |
*/ | |
namespace test { | |
namespace gemm { | |
namespace thread { | |
///////////////////////////////////////////////////////////////////////////////////////////////// | |
/// Thread-level matrix multiply-accumulate | |
template <typename Mma> | |
__global__ void kernel( | |
typename Mma::ElementC *D, | |
typename Mma::ElementA const *A, | |
typename Mma::ElementB const *B, | |
typename Mma::ElementC const *C) { | |
auto ptr_D = reinterpret_cast<cutlass::Array<typename Mma::ElementC, Mma::Shape::kMN> *>(D); | |
auto ptr_A = reinterpret_cast<cutlass::Array<typename Mma::ElementA, Mma::Shape::kMK> const *>(A); | |
auto ptr_B = reinterpret_cast<cutlass::Array<typename Mma::ElementB, Mma::Shape::kKN> const *>(B); | |
auto ptr_C = reinterpret_cast<cutlass::Array<typename Mma::ElementC, Mma::Shape::kMN> const *>(C); | |
Mma mma; | |
auto a = *ptr_A; | |
auto b = *ptr_B; | |
auto c = *ptr_C; | |
cutlass::Array<typename Mma::ElementC, Mma::Shape::kMN> d; | |
mma(d, a, b, c); | |
*ptr_D = d; | |
} | |
///////////////////////////////////////////////////////////////////////////////////////////////// | |
/// Structure to compute the matrix product | |
template < | |
/// Size of the Gemm problem - concept: gemm::GemmShape<> | |
typename Shape, | |
/// Data type of A elements | |
typename ElementA, | |
/// Layout of A matrix (concept: MatrixLayout) | |
typename LayoutA, | |
/// Data type of B elements | |
typename ElementB, | |
/// Layout of B matrix (concept: MatrixLayout) | |
typename LayoutB, | |
/// Element type of C matrix | |
typename ElementC, | |
/// Layout of C matrix (concept: MatrixLayout) | |
typename LayoutC | |
> | |
struct Testbed { | |
/// Thread-level matrix multiply-accumulate operator | |
using Mma = cutlass::gemm::thread::Mma< | |
Shape, | |
ElementA, | |
LayoutA, | |
ElementB, | |
LayoutB, | |
ElementC, | |
LayoutC | |
>; | |
// | |
// Data members | |
// | |
cutlass::HostTensor<ElementA, LayoutA> tensor_A; | |
cutlass::HostTensor<ElementB, LayoutB> tensor_B; | |
cutlass::HostTensor<ElementC, LayoutC> tensor_C; | |
cutlass::HostTensor<ElementC, LayoutC> tensor_D_computed; | |
cutlass::HostTensor<ElementC, LayoutC> tensor_D_reference; | |
// | |
// Methods | |
// | |
/// Allocates workspace in device memory | |
Testbed() { | |
tensor_A.reset(cutlass::make_Coord(Shape::kM, Shape::kK)); | |
tensor_B.reset(cutlass::make_Coord(Shape::kK, Shape::kN)); | |
tensor_C.reset(cutlass::make_Coord(Shape::kM, Shape::kN)); | |
tensor_D_computed.reset(cutlass::make_Coord(Shape::kM, Shape::kN)); | |
tensor_D_reference.reset(cutlass::make_Coord(Shape::kM, Shape::kN), false); | |
} | |
/// Runs the test | |
bool run() { | |
// | |
// initialize device memory | |
// | |
cutlass::reference::host::BlockFillSequential( | |
tensor_A.host_data(), | |
tensor_A.capacity() | |
); | |
cutlass::reference::host::BlockFillSequential( | |
tensor_B.host_data(), | |
tensor_B.capacity(), | |
ElementB(1), | |
ElementB(2) | |
); | |
cutlass::reference::host::TensorFill( | |
tensor_C.host_view(), | |
ElementC(0) | |
); | |
cutlass::reference::host::TensorFill( | |
tensor_D_computed.host_view(), | |
ElementC(0) | |
); | |
cutlass::reference::host::TensorFill( | |
tensor_D_reference.host_view(), | |
ElementC(0) | |
); | |
tensor_A.sync_device(); | |
tensor_B.sync_device(); | |
tensor_C.sync_device(); | |
tensor_D_computed.sync_device(); | |
// launch kernel | |
kernel<Mma><<< dim3(1, 1), dim3(1, 1, 1) >>>( | |
tensor_D_computed.device_data(), | |
tensor_A.device_data(), | |
tensor_B.device_data(), | |
tensor_C.device_data()); | |
// verify no errors | |
cudaError_t result = cudaDeviceSynchronize(); | |
EXPECT_EQ(result, cudaSuccess) << "CUDA ERROR: " << cudaGetErrorString(result); | |
if (result != cudaSuccess) { | |
return false; | |
} | |
tensor_D_computed.sync_host(); | |
// | |
// Reference implementation | |
// | |
//tensor_D_reference.fill(tensor_C.host_view()); | |
cutlass::reference::host::Gemm<ElementA, LayoutA, ElementB, LayoutB, | |
ElementC, LayoutC, ElementC, ElementC> | |
reference_gemm; | |
reference_gemm( | |
{Shape::kM, Shape::kN, Shape::kK}, | |
ElementC(1), | |
tensor_A.host_ref(), | |
tensor_B.host_ref(), | |
ElementC(0), | |
tensor_D_reference.host_ref() | |
); | |
// | |
// Verify equivalence | |
// | |
// compare | |
bool passed = cutlass::reference::host::TensorEquals( | |
tensor_D_computed.host_view(), | |
tensor_D_reference.host_view() | |
); | |
EXPECT_TRUE(passed) | |
<< "A:\n" << tensor_A.host_view() << "\n\n" | |
<< "B:\n" << tensor_B.host_view() << "\n\n" | |
<< "C:\n" << tensor_C.host_view() << "\n\n" | |
<< "Reference:\n" << tensor_D_reference.host_view() << "\n\n" | |
<< "Computed:\n" << tensor_D_computed.host_view() << std::endl; | |
return passed; | |
} | |
}; | |
///////////////////////////////////////////////////////////////////////////////////////////////// | |
} // namespace thread | |
} // namespace gemm | |
} // namespace test | |