Llama-3.1-8B-DALv0.1
/
venv
/lib
/python3.12
/site-packages
/transformers
/kernels
/mra
/cuda_launch.h
std::vector<at::Tensor> index_max_kernel( | |
at::Tensor index_vals, | |
at::Tensor indices, | |
int A_num_block, | |
int B_num_block | |
); | |
at::Tensor mm_to_sparse_kernel( | |
at::Tensor dense_A, | |
at::Tensor dense_B, | |
at::Tensor indices | |
); | |
at::Tensor sparse_dense_mm_kernel( | |
at::Tensor sparse_A, | |
at::Tensor indices, | |
at::Tensor dense_B, | |
int A_num_block | |
); | |
at::Tensor reduce_sum_kernel( | |
at::Tensor sparse_A, | |
at::Tensor indices, | |
int A_num_block, | |
int B_num_block | |
); | |
at::Tensor scatter_kernel( | |
at::Tensor dense_A, | |
at::Tensor indices, | |
int B_num_block | |
); | |