faiss-gpu>=1.7.2 | |
# for unstructured | |
onnxruntime-gpu==1.15.0 | |
auto-gptq>=0.7.1 | |
#optimum>=1.17.1 | |
# autoawq for cuda 12.1, else build from source: https://github.com/casper-hansen/AutoAWQ?tab=readme-ov-file#build-from-source | |
autoawq | |
autoawq-kernels | |
exllama @ https://github.com/jllllll/exllama/releases/download/0.0.18/exllama-0.0.18+cu121-cp310-cp310-linux_x86_64.whl | |
# See: Dao-AILab/flash-attention/issues/453 | |
# flash-attn==2.4.2 | |