File size: 10,384 Bytes
4bdb245
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
{
  lib,
  glibc,
  config,
  stdenv,
  mkShell,
  runCommand,
  cmake,
  ninja,
  pkg-config,
  git,
  python3,
  mpi,
  blas,
  cudaPackages,
  darwin,
  rocmPackages,
  vulkan-headers,
  vulkan-loader,
  clblast,
  useBlas ? builtins.all (x: !x) [
    useCuda
    useMetalKit
    useOpenCL
    useRocm
    useVulkan
  ] && blas.meta.available,
  useCuda ? config.cudaSupport,
  useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin && !useOpenCL,
  useMpi ? false, # Increases the runtime closure size by ~700M
  useOpenCL ? false,
  useRocm ? config.rocmSupport,
  useVulkan ? false,
  llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake

  # It's necessary to consistently use backendStdenv when building with CUDA support,
  # otherwise we get libstdc++ errors downstream.
  effectiveStdenv ? if useCuda then cudaPackages.backendStdenv else stdenv,
  enableStatic ? effectiveStdenv.hostPlatform.isStatic,
  precompileMetalShaders ? false
}@inputs:

let
  inherit (lib)
    cmakeBool
    cmakeFeature
    optionals
    strings
    versionOlder
    ;

  stdenv = throw "Use effectiveStdenv instead";

  suffices =
    lib.optionals useBlas [ "BLAS" ]
    ++ lib.optionals useCuda [ "CUDA" ]
    ++ lib.optionals useMetalKit [ "MetalKit" ]
    ++ lib.optionals useMpi [ "MPI" ]
    ++ lib.optionals useOpenCL [ "OpenCL" ]
    ++ lib.optionals useRocm [ "ROCm" ]
    ++ lib.optionals useVulkan [ "Vulkan" ];

  pnameSuffix =
    strings.optionalString (suffices != [ ])
      "-${strings.concatMapStringsSep "-" strings.toLower suffices}";
  descriptionSuffix =
    strings.optionalString (suffices != [ ])
      ", accelerated with ${strings.concatStringsSep ", " suffices}";

  executableSuffix = effectiveStdenv.hostPlatform.extensions.executable;

  # TODO: package the Python in this repository in a Nix-like way.
  # It'd be nice to migrate to buildPythonPackage, as well as ensure this repo
  # is PEP 517-compatible, and ensure the correct .dist-info is generated.
  # https://peps.python.org/pep-0517/
  #
  # TODO: Package up each Python script or service appropriately, by making
  # them into "entrypoints"
  llama-python = python3.withPackages (
    ps: [
      ps.numpy
      ps.sentencepiece
    ]
  );

  # TODO(Green-Sky): find a better way to opt-into the heavy ml python runtime
  llama-python-extra = python3.withPackages (
    ps: [
      ps.numpy
      ps.sentencepiece
      ps.tiktoken
      ps.torchWithoutCuda
      ps.transformers
    ]
  );

  xcrunHost = runCommand "xcrunHost" {} ''
    mkdir -p $out/bin
    ln -s /usr/bin/xcrun $out/bin
  '';

  # apple_sdk is supposed to choose sane defaults, no need to handle isAarch64
  # separately
  darwinBuildInputs =
    with darwin.apple_sdk.frameworks;
    [
      Accelerate
      CoreVideo
      CoreGraphics
    ]
    ++ optionals useMetalKit [ MetalKit ];

  cudaBuildInputs = with cudaPackages; [
    cuda_cccl.dev # <nv/target>

    # A temporary hack for reducing the closure size, remove once cudaPackages
    # have stopped using lndir: https://github.com/NixOS/nixpkgs/issues/271792
    cuda_cudart.dev
    cuda_cudart.lib
    cuda_cudart.static
    libcublas.dev
    libcublas.lib
    libcublas.static
  ];

  rocmBuildInputs = with rocmPackages; [
    clr
    hipblas
    rocblas
  ];

  vulkanBuildInputs = [
    vulkan-headers
    vulkan-loader
  ];
in

effectiveStdenv.mkDerivation (
  finalAttrs: {
    pname = "llama-cpp${pnameSuffix}";
    version = llamaVersion;

    # Note: none of the files discarded here are visible in the sandbox or
    # affect the output hash. This also means they can be modified without
    # triggering a rebuild.
    src = lib.cleanSourceWith {
      filter =
        name: type:
        let
          noneOf = builtins.all (x: !x);
          baseName = baseNameOf name;
        in
        noneOf [
          (lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths
          (lib.hasSuffix ".md" name) # Ignore *.md changes whe computing outPaths
          (lib.hasPrefix "." baseName) # Skip hidden files and directories
          (baseName == "flake.lock")
        ];
      src = lib.cleanSource ../../.;
    };

    postPatch = ''
      substituteInPlace ./ggml-metal.m \
        --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
      substituteInPlace ./ggml-metal.m \
        --replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";"
    '';

    # With PR#6015 https://github.com/ggerganov/llama.cpp/pull/6015,
    # `default.metallib` may be compiled with Metal compiler from XCode
    # and we need to escape sandbox on MacOS to access Metal compiler.
    # `xcrun` is used find the path of the Metal compiler, which is varible
    # and not on $PATH
    # see https://github.com/ggerganov/llama.cpp/pull/6118 for discussion
    __noChroot = effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders;

    nativeBuildInputs =
      [
        cmake
        ninja
        pkg-config
        git
      ]
      ++ optionals useCuda [
        cudaPackages.cuda_nvcc

        # TODO: Replace with autoAddDriverRunpath
        # once https://github.com/NixOS/nixpkgs/pull/275241 has been merged
        cudaPackages.autoAddOpenGLRunpathHook
      ]
      ++ optionals (effectiveStdenv.hostPlatform.isGnu && enableStatic) [
        glibc.static
      ] ++ optionals (effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders) [
        xcrunHost
      ];

    buildInputs =
      optionals effectiveStdenv.isDarwin darwinBuildInputs
      ++ optionals useCuda cudaBuildInputs
      ++ optionals useMpi [ mpi ]
      ++ optionals useOpenCL [ clblast ]
      ++ optionals useRocm rocmBuildInputs
      ++ optionals useBlas [ blas ]
      ++ optionals useVulkan vulkanBuildInputs;

    cmakeFlags =
      [
        (cmakeBool "LLAMA_NATIVE" false)
        (cmakeBool "LLAMA_BUILD_SERVER" true)
        (cmakeBool "BUILD_SHARED_LIBS" (!enableStatic))
        (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
        (cmakeBool "LLAMA_BLAS" useBlas)
        (cmakeBool "LLAMA_CLBLAST" useOpenCL)
        (cmakeBool "LLAMA_CUDA" useCuda)
        (cmakeBool "LLAMA_HIPBLAS" useRocm)
        (cmakeBool "LLAMA_METAL" useMetalKit)
        (cmakeBool "LLAMA_MPI" useMpi)
        (cmakeBool "LLAMA_VULKAN" useVulkan)
        (cmakeBool "LLAMA_STATIC" enableStatic)
      ]
      ++ optionals useCuda [
        (
          with cudaPackages.flags;
          cmakeFeature "CMAKE_CUDA_ARCHITECTURES" (
            builtins.concatStringsSep ";" (map dropDot cudaCapabilities)
          )
        )
      ]
      ++ optionals useRocm [
        (cmakeFeature "CMAKE_C_COMPILER" "hipcc")
        (cmakeFeature "CMAKE_CXX_COMPILER" "hipcc")

        # Build all targets supported by rocBLAS. When updating search for TARGET_LIST_ROCM
        # in https://github.com/ROCmSoftwarePlatform/rocBLAS/blob/develop/CMakeLists.txt
        # and select the line that matches the current nixpkgs version of rocBLAS.
        # Should likely use `rocmPackages.clr.gpuTargets`.
        "-DAMDGPU_TARGETS=gfx803;gfx900;gfx906:xnack-;gfx908:xnack-;gfx90a:xnack+;gfx90a:xnack-;gfx940;gfx941;gfx942;gfx1010;gfx1012;gfx1030;gfx1100;gfx1101;gfx1102"
      ]
      ++ optionals useMetalKit [
        (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1")
        (cmakeBool "LLAMA_METAL_EMBED_LIBRARY" (!precompileMetalShaders))
      ];

    # TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
    # if they haven't been added yet.
    postInstall = ''
      mv $out/bin/main${executableSuffix} $out/bin/llama${executableSuffix}
      mv $out/bin/server${executableSuffix} $out/bin/llama-server${executableSuffix}
      mkdir -p $out/include
      cp $src/llama.h $out/include/
    '';

    # Define the shells here, but don't add in the inputsFrom to avoid recursion.
    passthru = {
      inherit
        useBlas
        useCuda
        useMetalKit
        useMpi
        useOpenCL
        useRocm
        useVulkan
        ;

      shell = mkShell {
        name = "shell-${finalAttrs.finalPackage.name}";
        description = "contains numpy and sentencepiece";
        buildInputs = [ llama-python ];
        inputsFrom = [ finalAttrs.finalPackage ];
        shellHook = ''
          addToSearchPath "LD_LIBRARY_PATH" "${lib.getLib effectiveStdenv.cc.cc}/lib"
        '';
      };

      shell-extra = mkShell {
        name = "shell-extra-${finalAttrs.finalPackage.name}";
        description = "contains numpy, sentencepiece, torchWithoutCuda, and transformers";
        buildInputs = [ llama-python-extra ];
        inputsFrom = [ finalAttrs.finalPackage ];
      };
    };

    meta = {
      # Configurations we don't want even the CI to evaluate. Results in the
      # "unsupported platform" messages. This is mostly a no-op, because
      # cudaPackages would've refused to evaluate anyway.
      badPlatforms = optionals (useCuda || useOpenCL) lib.platforms.darwin;

      # Configurations that are known to result in build failures. Can be
      # overridden by importing Nixpkgs with `allowBroken = true`.
      broken = (useMetalKit && !effectiveStdenv.isDarwin);

      description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
      homepage = "https://github.com/ggerganov/llama.cpp/";
      license = lib.licenses.mit;

      # Accommodates `nix run` and `lib.getExe`
      mainProgram = "llama";

      # These people might respond, on the best effort basis, if you ping them
      # in case of Nix-specific regressions or for reviewing Nix-specific PRs.
      # Consider adding yourself to this list if you want to ensure this flake
      # stays maintained and you're willing to invest your time. Do not add
      # other people without their consent. Consider removing people after
      # they've been unreachable for long periods of time.

      # Note that lib.maintainers is defined in Nixpkgs, but you may just add
      # an attrset following the same format as in
      # https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix
      maintainers = with lib.maintainers; [
        philiptaron
        SomeoneSerge
      ];

      # Extend `badPlatforms` instead
      platforms = lib.platforms.all;
    };
  }
)