Spaces:
Runtime error
Runtime error
{ | |
lib, | |
glibc, | |
config, | |
stdenv, | |
mkShell, | |
runCommand, | |
cmake, | |
ninja, | |
pkg-config, | |
git, | |
python3, | |
mpi, | |
blas, | |
cudaPackages, | |
darwin, | |
rocmPackages, | |
vulkan-headers, | |
vulkan-loader, | |
clblast, | |
useBlas ? builtins.all (x: !x) [ | |
useCuda | |
useMetalKit | |
useOpenCL | |
useRocm | |
useVulkan | |
] && blas.meta.available, | |
useCuda ? config.cudaSupport, | |
useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin && !useOpenCL, | |
useMpi ? false, # Increases the runtime closure size by ~700M | |
useOpenCL ? false, | |
useRocm ? config.rocmSupport, | |
useVulkan ? false, | |
llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake | |
# It's necessary to consistently use backendStdenv when building with CUDA support, | |
# otherwise we get libstdc++ errors downstream. | |
effectiveStdenv ? if useCuda then cudaPackages.backendStdenv else stdenv, | |
enableStatic ? effectiveStdenv.hostPlatform.isStatic, | |
precompileMetalShaders ? false | |
}@inputs: | |
let | |
inherit (lib) | |
cmakeBool | |
cmakeFeature | |
optionals | |
strings | |
versionOlder | |
; | |
stdenv = throw "Use effectiveStdenv instead"; | |
suffices = | |
lib.optionals useBlas [ "BLAS" ] | |
++ lib.optionals useCuda [ "CUDA" ] | |
++ lib.optionals useMetalKit [ "MetalKit" ] | |
++ lib.optionals useMpi [ "MPI" ] | |
++ lib.optionals useOpenCL [ "OpenCL" ] | |
++ lib.optionals useRocm [ "ROCm" ] | |
++ lib.optionals useVulkan [ "Vulkan" ]; | |
pnameSuffix = | |
strings.optionalString (suffices != [ ]) | |
"-${strings.concatMapStringsSep "-" strings.toLower suffices}"; | |
descriptionSuffix = | |
strings.optionalString (suffices != [ ]) | |
", accelerated with ${strings.concatStringsSep ", " suffices}"; | |
executableSuffix = effectiveStdenv.hostPlatform.extensions.executable; | |
# TODO: package the Python in this repository in a Nix-like way. | |
# It'd be nice to migrate to buildPythonPackage, as well as ensure this repo | |
# is PEP 517-compatible, and ensure the correct .dist-info is generated. | |
# https://peps.python.org/pep-0517/ | |
# | |
# TODO: Package up each Python script or service appropriately, by making | |
# them into "entrypoints" | |
llama-python = python3.withPackages ( | |
ps: [ | |
ps.numpy | |
ps.sentencepiece | |
] | |
); | |
# TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime | |
llama-python-extra = python3.withPackages ( | |
ps: [ | |
ps.numpy | |
ps.sentencepiece | |
ps.tiktoken | |
ps.torchWithoutCuda | |
ps.transformers | |
] | |
); | |
xcrunHost = runCommand "xcrunHost" {} '' | |
mkdir -p $out/bin | |
ln -s /usr/bin/xcrun $out/bin | |
''; | |
# apple_sdk is supposed to choose sane defaults, no need to handle isAarch64 | |
# separately | |
darwinBuildInputs = | |
with darwin.apple_sdk.frameworks; | |
[ | |
Accelerate | |
CoreVideo | |
CoreGraphics | |
] | |
++ optionals useMetalKit [ MetalKit ]; | |
cudaBuildInputs = with cudaPackages; [ | |
cuda_cccl.dev # <nv/target> | |
# A temporary hack for reducing the closure size, remove once cudaPackages | |
# have stopped using lndir: https://github.com/NixOS/nixpkgs/issues/271792 | |
cuda_cudart.dev | |
cuda_cudart.lib | |
cuda_cudart.static | |
libcublas.dev | |
libcublas.lib | |
libcublas.static | |
]; | |
rocmBuildInputs = with rocmPackages; [ | |
clr | |
hipblas | |
rocblas | |
]; | |
vulkanBuildInputs = [ | |
vulkan-headers | |
vulkan-loader | |
]; | |
in | |
effectiveStdenv.mkDerivation ( | |
finalAttrs: { | |
pname = "llama-cpp${pnameSuffix}"; | |
version = llamaVersion; | |
# Note: none of the files discarded here are visible in the sandbox or | |
# affect the output hash. This also means they can be modified without | |
# triggering a rebuild. | |
src = lib.cleanSourceWith { | |
filter = | |
name: type: | |
let | |
noneOf = builtins.all (x: !x); | |
baseName = baseNameOf name; | |
in | |
noneOf [ | |
(lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths | |
(lib.hasSuffix ".md" name) # Ignore *.md changes whe computing outPaths | |
(lib.hasPrefix "." baseName) # Skip hidden files and directories | |
(baseName == "flake.lock") | |
]; | |
src = lib.cleanSource ../../.; | |
}; | |
postPatch = '' | |
substituteInPlace ./ggml-metal.m \ | |
--replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";" | |
substituteInPlace ./ggml-metal.m \ | |
--replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";" | |
''; | |
# With PR#6015 https://github.com/ggerganov/llama.cpp/pull/6015, | |
# `default.metallib` may be compiled with Metal compiler from XCode | |
# and we need to escape sandbox on MacOS to access Metal compiler. | |
# `xcrun` is used find the path of the Metal compiler, which is varible | |
# and not on $PATH | |
# see https://github.com/ggerganov/llama.cpp/pull/6118 for discussion | |
__noChroot = effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders; | |
nativeBuildInputs = | |
[ | |
cmake | |
ninja | |
pkg-config | |
git | |
] | |
++ optionals useCuda [ | |
cudaPackages.cuda_nvcc | |
# TODO: Replace with autoAddDriverRunpath | |
# once https://github.com/NixOS/nixpkgs/pull/275241 has been merged | |
cudaPackages.autoAddOpenGLRunpathHook | |
] | |
++ optionals (effectiveStdenv.hostPlatform.isGnu && enableStatic) [ | |
glibc.static | |
] ++ optionals (effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders) [ | |
xcrunHost | |
]; | |
buildInputs = | |
optionals effectiveStdenv.isDarwin darwinBuildInputs | |
++ optionals useCuda cudaBuildInputs | |
++ optionals useMpi [ mpi ] | |
++ optionals useOpenCL [ clblast ] | |
++ optionals useRocm rocmBuildInputs | |
++ optionals useBlas [ blas ] | |
++ optionals useVulkan vulkanBuildInputs; | |
cmakeFlags = | |
[ | |
(cmakeBool "LLAMA_NATIVE" false) | |
(cmakeBool "LLAMA_BUILD_SERVER" true) | |
(cmakeBool "BUILD_SHARED_LIBS" (!enableStatic)) | |
(cmakeBool "CMAKE_SKIP_BUILD_RPATH" true) | |
(cmakeBool "LLAMA_BLAS" useBlas) | |
(cmakeBool "LLAMA_CLBLAST" useOpenCL) | |
(cmakeBool "LLAMA_CUDA" useCuda) | |
(cmakeBool "LLAMA_HIPBLAS" useRocm) | |
(cmakeBool "LLAMA_METAL" useMetalKit) | |
(cmakeBool "LLAMA_MPI" useMpi) | |
(cmakeBool "LLAMA_VULKAN" useVulkan) | |
(cmakeBool "LLAMA_STATIC" enableStatic) | |
] | |
++ optionals useCuda [ | |
( | |
with cudaPackages.flags; | |
cmakeFeature "CMAKE_CUDA_ARCHITECTURES" ( | |
builtins.concatStringsSep ";" (map dropDot cudaCapabilities) | |
) | |
) | |
] | |
++ optionals useRocm [ | |
(cmakeFeature "CMAKE_C_COMPILER" "hipcc") | |
(cmakeFeature "CMAKE_CXX_COMPILER" "hipcc") | |
# Build all targets supported by rocBLAS. When updating search for TARGET_LIST_ROCM | |
# in https://github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/CMakeLists.txt | |
# and select the line that matches the current nixpkgs version of rocBLAS. | |
# Should likely use `rocmPackages.clr.gpuTargets`. | |
"-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102" | |
] | |
++ optionals useMetalKit [ | |
(lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1") | |
(cmakeBool "LLAMA_METAL_EMBED_LIBRARY" (!precompileMetalShaders)) | |
]; | |
# TODO(SomeoneSerge): It's better to add proper install targets at the CMake level, | |
# if they haven't been added yet. | |
postInstall = '' | |
mv $out/bin/main${executableSuffix} $out/bin/llama${executableSuffix} | |
mv $out/bin/server${executableSuffix} $out/bin/llama-server${executableSuffix} | |
mkdir -p $out/include | |
cp $src/llama.h $out/include/ | |
''; | |
# Define the shells here, but don't add in the inputsFrom to avoid recursion. | |
passthru = { | |
inherit | |
useBlas | |
useCuda | |
useMetalKit | |
useMpi | |
useOpenCL | |
useRocm | |
useVulkan | |
; | |
shell = mkShell { | |
name = "shell-${finalAttrs.finalPackage.name}"; | |
description = "contains numpy and sentencepiece"; | |
buildInputs = [ llama-python ]; | |
inputsFrom = [ finalAttrs.finalPackage ]; | |
shellHook = '' | |
addToSearchPath "LD_LIBRARY_PATH" "${lib.getLib effectiveStdenv.cc.cc}/lib" | |
''; | |
}; | |
shell-extra = mkShell { | |
name = "shell-extra-${finalAttrs.finalPackage.name}"; | |
description = "contains numpy, sentencepiece, torchWithoutCuda, and transformers"; | |
buildInputs = [ llama-python-extra ]; | |
inputsFrom = [ finalAttrs.finalPackage ]; | |
}; | |
}; | |
meta = { | |
# Configurations we don't want even the CI to evaluate. Results in the | |
# "unsupported platform" messages. This is mostly a no-op, because | |
# cudaPackages would've refused to evaluate anyway. | |
badPlatforms = optionals (useCuda || useOpenCL) lib.platforms.darwin; | |
# Configurations that are known to result in build failures. Can be | |
# overridden by importing Nixpkgs with `allowBroken = true`. | |
broken = (useMetalKit && !effectiveStdenv.isDarwin); | |
description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}"; | |
homepage = "https://github.com/ggerganov/llama.cpp/"; | |
license = lib.licenses.mit; | |
# Accommodates `nix run` and `lib.getExe` | |
mainProgram = "llama"; | |
# These people might respond, on the best effort basis, if you ping them | |
# in case of Nix-specific regressions or for reviewing Nix-specific PRs. | |
# Consider adding yourself to this list if you want to ensure this flake | |
# stays maintained and you're willing to invest your time. Do not add | |
# other people without their consent. Consider removing people after | |
# they've been unreachable for long periods of time. | |
# Note that lib.maintainers is defined in Nixpkgs, but you may just add | |
# an attrset following the same format as in | |
# https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix | |
maintainers = with lib.maintainers; [ | |
philiptaron | |
SomeoneSerge | |
]; | |
# Extend `badPlatforms` instead | |
platforms = lib.platforms.all; | |
}; | |
} | |
) | |