Upload 2637 files
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +102 -0
- llama.cpp/.DS_Store +0 -0
- llama.cpp/.clang-format +161 -0
- llama.cpp/.clang-tidy +26 -0
- llama.cpp/.devops/cloud-v-pipeline +22 -0
- llama.cpp/.devops/cpu.Dockerfile +92 -0
- llama.cpp/.devops/cuda.Dockerfile +94 -0
- llama.cpp/.devops/intel.Dockerfile +91 -0
- llama.cpp/.devops/llama-cli-cann.Dockerfile +44 -0
- llama.cpp/.devops/llama-cpp-cuda.srpm.spec +83 -0
- llama.cpp/.devops/llama-cpp.srpm.spec +85 -0
- llama.cpp/.devops/musa.Dockerfile +108 -0
- llama.cpp/.devops/nix/apps.nix +21 -0
- llama.cpp/.devops/nix/devshells.nix +52 -0
- llama.cpp/.devops/nix/docker.nix +37 -0
- llama.cpp/.devops/nix/jetson-support.nix +39 -0
- llama.cpp/.devops/nix/nixpkgs-instances.nix +45 -0
- llama.cpp/.devops/nix/package-gguf-py.nix +36 -0
- llama.cpp/.devops/nix/package.nix +247 -0
- llama.cpp/.devops/nix/python-scripts.nix +66 -0
- llama.cpp/.devops/nix/scope.nix +41 -0
- llama.cpp/.devops/nix/sif.nix +27 -0
- llama.cpp/.devops/rocm.Dockerfile +113 -0
- llama.cpp/.devops/tools.sh +49 -0
- llama.cpp/.devops/vulkan.Dockerfile +89 -0
- llama.cpp/.dockerignore +20 -0
- llama.cpp/.ecrc +6 -0
- llama.cpp/.editorconfig +50 -0
- llama.cpp/.flake8 +17 -0
- llama.cpp/.github/.DS_Store +0 -0
- llama.cpp/.github/ISSUE_TEMPLATE/010-bug-compilation.yml +87 -0
- llama.cpp/.github/ISSUE_TEMPLATE/011-bug-results.yml +101 -0
- llama.cpp/.github/ISSUE_TEMPLATE/019-bug-misc.yml +91 -0
- llama.cpp/.github/ISSUE_TEMPLATE/020-enhancement.yml +51 -0
- llama.cpp/.github/ISSUE_TEMPLATE/030-research.yml +52 -0
- llama.cpp/.github/ISSUE_TEMPLATE/040-refactor.yml +28 -0
- llama.cpp/.github/ISSUE_TEMPLATE/config.yml +11 -0
- llama.cpp/.github/labeler.yml +86 -0
- llama.cpp/.github/pull_request_template.md +1 -0
- llama.cpp/.github/workflows/bench.yml.disabled +315 -0
- llama.cpp/.github/workflows/build.yml +1645 -0
- llama.cpp/.github/workflows/close-issue.yml +28 -0
- llama.cpp/.github/workflows/docker.yml +173 -0
- llama.cpp/.github/workflows/editorconfig.yml +29 -0
- llama.cpp/.github/workflows/gguf-publish.yml +44 -0
- llama.cpp/.github/workflows/labeler.yml +17 -0
- llama.cpp/.github/workflows/python-check-requirements.yml +33 -0
- llama.cpp/.github/workflows/python-lint.yml +30 -0
- llama.cpp/.github/workflows/python-type-check.yml +40 -0
- llama.cpp/.github/workflows/server.yml +239 -0
.gitattributes
CHANGED
@@ -33,3 +33,105 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
llama.cpp/build/bin/libggml-base.dylib filter=lfs diff=lfs merge=lfs -text
|
37 |
+
llama.cpp/build/bin/libggml-cpu.dylib filter=lfs diff=lfs merge=lfs -text
|
38 |
+
llama.cpp/build/bin/libggml-metal.dylib filter=lfs diff=lfs merge=lfs -text
|
39 |
+
llama.cpp/build/bin/libllama.dylib filter=lfs diff=lfs merge=lfs -text
|
40 |
+
llama.cpp/build/bin/libllava_shared.dylib filter=lfs diff=lfs merge=lfs -text
|
41 |
+
llama.cpp/build/bin/llama-batched filter=lfs diff=lfs merge=lfs -text
|
42 |
+
llama.cpp/build/bin/llama-batched-bench filter=lfs diff=lfs merge=lfs -text
|
43 |
+
llama.cpp/build/bin/llama-bench filter=lfs diff=lfs merge=lfs -text
|
44 |
+
llama.cpp/build/bin/llama-cli filter=lfs diff=lfs merge=lfs -text
|
45 |
+
llama.cpp/build/bin/llama-convert-llama2c-to-ggml filter=lfs diff=lfs merge=lfs -text
|
46 |
+
llama.cpp/build/bin/llama-cvector-generator filter=lfs diff=lfs merge=lfs -text
|
47 |
+
llama.cpp/build/bin/llama-embedding filter=lfs diff=lfs merge=lfs -text
|
48 |
+
llama.cpp/build/bin/llama-eval-callback filter=lfs diff=lfs merge=lfs -text
|
49 |
+
llama.cpp/build/bin/llama-export-lora filter=lfs diff=lfs merge=lfs -text
|
50 |
+
llama.cpp/build/bin/llama-gen-docs filter=lfs diff=lfs merge=lfs -text
|
51 |
+
llama.cpp/build/bin/llama-gritlm filter=lfs diff=lfs merge=lfs -text
|
52 |
+
llama.cpp/build/bin/llama-imatrix filter=lfs diff=lfs merge=lfs -text
|
53 |
+
llama.cpp/build/bin/llama-infill filter=lfs diff=lfs merge=lfs -text
|
54 |
+
llama.cpp/build/bin/llama-llava-cli filter=lfs diff=lfs merge=lfs -text
|
55 |
+
llama.cpp/build/bin/llama-llava-clip-quantize-cli filter=lfs diff=lfs merge=lfs -text
|
56 |
+
llama.cpp/build/bin/llama-lookahead filter=lfs diff=lfs merge=lfs -text
|
57 |
+
llama.cpp/build/bin/llama-lookup filter=lfs diff=lfs merge=lfs -text
|
58 |
+
llama.cpp/build/bin/llama-lookup-create filter=lfs diff=lfs merge=lfs -text
|
59 |
+
llama.cpp/build/bin/llama-lookup-stats filter=lfs diff=lfs merge=lfs -text
|
60 |
+
llama.cpp/build/bin/llama-minicpmv-cli filter=lfs diff=lfs merge=lfs -text
|
61 |
+
llama.cpp/build/bin/llama-parallel filter=lfs diff=lfs merge=lfs -text
|
62 |
+
llama.cpp/build/bin/llama-passkey filter=lfs diff=lfs merge=lfs -text
|
63 |
+
llama.cpp/build/bin/llama-perplexity filter=lfs diff=lfs merge=lfs -text
|
64 |
+
llama.cpp/build/bin/llama-quantize filter=lfs diff=lfs merge=lfs -text
|
65 |
+
llama.cpp/build/bin/llama-quantize-stats filter=lfs diff=lfs merge=lfs -text
|
66 |
+
llama.cpp/build/bin/llama-qwen2vl-cli filter=lfs diff=lfs merge=lfs -text
|
67 |
+
llama.cpp/build/bin/llama-retrieval filter=lfs diff=lfs merge=lfs -text
|
68 |
+
llama.cpp/build/bin/llama-run filter=lfs diff=lfs merge=lfs -text
|
69 |
+
llama.cpp/build/bin/llama-save-load-state filter=lfs diff=lfs merge=lfs -text
|
70 |
+
llama.cpp/build/bin/llama-server filter=lfs diff=lfs merge=lfs -text
|
71 |
+
llama.cpp/build/bin/llama-speculative filter=lfs diff=lfs merge=lfs -text
|
72 |
+
llama.cpp/build/bin/llama-speculative-simple filter=lfs diff=lfs merge=lfs -text
|
73 |
+
llama.cpp/build/bin/llama-tokenize filter=lfs diff=lfs merge=lfs -text
|
74 |
+
llama.cpp/build/bin/llama-tts filter=lfs diff=lfs merge=lfs -text
|
75 |
+
llama.cpp/build/bin/test-arg-parser filter=lfs diff=lfs merge=lfs -text
|
76 |
+
llama.cpp/build/bin/test-backend-ops filter=lfs diff=lfs merge=lfs -text
|
77 |
+
llama.cpp/build/bin/test-chat filter=lfs diff=lfs merge=lfs -text
|
78 |
+
llama.cpp/build/bin/test-chat-template filter=lfs diff=lfs merge=lfs -text
|
79 |
+
llama.cpp/build/bin/test-grammar-integration filter=lfs diff=lfs merge=lfs -text
|
80 |
+
llama.cpp/build/bin/test-json-schema-to-grammar filter=lfs diff=lfs merge=lfs -text
|
81 |
+
llama.cpp/build/bin/test-tokenizer-0 filter=lfs diff=lfs merge=lfs -text
|
82 |
+
llama.cpp/build/bin/test-tokenizer-1-bpe filter=lfs diff=lfs merge=lfs -text
|
83 |
+
llama.cpp/build/bin/test-tokenizer-1-spm filter=lfs diff=lfs merge=lfs -text
|
84 |
+
llama.cpp/build/common/CMakeFiles/common.dir/arg.cpp.o filter=lfs diff=lfs merge=lfs -text
|
85 |
+
llama.cpp/build/common/CMakeFiles/common.dir/chat.cpp.o filter=lfs diff=lfs merge=lfs -text
|
86 |
+
llama.cpp/build/common/CMakeFiles/common.dir/common.cpp.o filter=lfs diff=lfs merge=lfs -text
|
87 |
+
llama.cpp/build/common/CMakeFiles/common.dir/json-schema-to-grammar.cpp.o filter=lfs diff=lfs merge=lfs -text
|
88 |
+
llama.cpp/build/common/libcommon.a filter=lfs diff=lfs merge=lfs -text
|
89 |
+
llama.cpp/build/examples/llama-bench/CMakeFiles/llama-bench.dir/llama-bench.cpp.o filter=lfs diff=lfs merge=lfs -text
|
90 |
+
llama.cpp/build/examples/llava/CMakeFiles/llava.dir/clip.cpp.o filter=lfs diff=lfs merge=lfs -text
|
91 |
+
llama.cpp/build/examples/llava/libllava_static.a filter=lfs diff=lfs merge=lfs -text
|
92 |
+
llama.cpp/build/examples/perplexity/CMakeFiles/llama-perplexity.dir/perplexity.cpp.o filter=lfs diff=lfs merge=lfs -text
|
93 |
+
llama.cpp/build/examples/quantize-stats/CMakeFiles/llama-quantize-stats.dir/quantize-stats.cpp.o filter=lfs diff=lfs merge=lfs -text
|
94 |
+
llama.cpp/build/examples/run/CMakeFiles/llama-run.dir/run.cpp.o filter=lfs diff=lfs merge=lfs -text
|
95 |
+
llama.cpp/build/examples/server/CMakeFiles/llama-server.dir/server.cpp.o filter=lfs diff=lfs merge=lfs -text
|
96 |
+
llama.cpp/build/examples/tts/CMakeFiles/llama-tts.dir/tts.cpp.o filter=lfs diff=lfs merge=lfs -text
|
97 |
+
llama.cpp/build/ggml/src/CMakeFiles/ggml-base.dir/ggml-quants.c.o filter=lfs diff=lfs merge=lfs -text
|
98 |
+
llama.cpp/build/ggml/src/CMakeFiles/ggml-base.dir/ggml.c.o filter=lfs diff=lfs merge=lfs -text
|
99 |
+
llama.cpp/build/ggml/src/CMakeFiles/ggml-base.dir/gguf.cpp.o filter=lfs diff=lfs merge=lfs -text
|
100 |
+
llama.cpp/build/ggml/src/CMakeFiles/ggml-cpu.dir/ggml-cpu/ggml-cpu.c.o filter=lfs diff=lfs merge=lfs -text
|
101 |
+
llama.cpp/build/ggml/src/CMakeFiles/ggml-cpu.dir/ggml-cpu/llamafile/sgemm.cpp.o filter=lfs diff=lfs merge=lfs -text
|
102 |
+
llama.cpp/build/ggml/src/ggml-metal/CMakeFiles/ggml-metal.dir/__/__/__/autogenerated/ggml-metal-embed.s.o filter=lfs diff=lfs merge=lfs -text
|
103 |
+
llama.cpp/build/ggml/src/ggml-metal/CMakeFiles/ggml-metal.dir/ggml-metal.m.o filter=lfs diff=lfs merge=lfs -text
|
104 |
+
llama.cpp/build/src/CMakeFiles/llama.dir/llama-model-loader.cpp.o filter=lfs diff=lfs merge=lfs -text
|
105 |
+
llama.cpp/build/src/CMakeFiles/llama.dir/llama-model.cpp.o filter=lfs diff=lfs merge=lfs -text
|
106 |
+
llama.cpp/build/src/CMakeFiles/llama.dir/llama-sampling.cpp.o filter=lfs diff=lfs merge=lfs -text
|
107 |
+
llama.cpp/build/src/CMakeFiles/llama.dir/llama-vocab.cpp.o filter=lfs diff=lfs merge=lfs -text
|
108 |
+
llama.cpp/build/src/CMakeFiles/llama.dir/llama.cpp.o filter=lfs diff=lfs merge=lfs -text
|
109 |
+
llama.cpp/build/src/CMakeFiles/llama.dir/unicode.cpp.o filter=lfs diff=lfs merge=lfs -text
|
110 |
+
llama.cpp/build/tests/CMakeFiles/test-backend-ops.dir/test-backend-ops.cpp.o filter=lfs diff=lfs merge=lfs -text
|
111 |
+
llama.cpp/build/tests/CMakeFiles/test-chat-template.dir/test-chat-template.cpp.o filter=lfs diff=lfs merge=lfs -text
|
112 |
+
llama.cpp/build/tests/CMakeFiles/test-chat.dir/test-chat.cpp.o filter=lfs diff=lfs merge=lfs -text
|
113 |
+
llama.cpp/build/tests/CMakeFiles/test-grammar-integration.dir/test-grammar-integration.cpp.o filter=lfs diff=lfs merge=lfs -text
|
114 |
+
llama.cpp/build/tests/CMakeFiles/test-json-schema-to-grammar.dir/test-json-schema-to-grammar.cpp.o filter=lfs diff=lfs merge=lfs -text
|
115 |
+
llama.cpp/docs/development/llama-star/idea-arch.key filter=lfs diff=lfs merge=lfs -text
|
116 |
+
llama.cpp/examples/server/themes/buttons-top/buttons_top.png filter=lfs diff=lfs merge=lfs -text
|
117 |
+
llama.cpp/examples/server/themes/wild/llamapattern.png filter=lfs diff=lfs merge=lfs -text
|
118 |
+
llama.cpp/examples/server/themes/wild/wild.png filter=lfs diff=lfs merge=lfs -text
|
119 |
+
llama.cpp/media/llama0-banner.png filter=lfs diff=lfs merge=lfs -text
|
120 |
+
llama.cpp/media/llama0-logo.png filter=lfs diff=lfs merge=lfs -text
|
121 |
+
llama.cpp/media/matmul.png filter=lfs diff=lfs merge=lfs -text
|
122 |
+
llama.cpp/models/ggml-vocab-aquila.gguf filter=lfs diff=lfs merge=lfs -text
|
123 |
+
llama.cpp/models/ggml-vocab-baichuan.gguf filter=lfs diff=lfs merge=lfs -text
|
124 |
+
llama.cpp/models/ggml-vocab-bert-bge.gguf filter=lfs diff=lfs merge=lfs -text
|
125 |
+
llama.cpp/models/ggml-vocab-command-r.gguf filter=lfs diff=lfs merge=lfs -text
|
126 |
+
llama.cpp/models/ggml-vocab-deepseek-coder.gguf filter=lfs diff=lfs merge=lfs -text
|
127 |
+
llama.cpp/models/ggml-vocab-deepseek-llm.gguf filter=lfs diff=lfs merge=lfs -text
|
128 |
+
llama.cpp/models/ggml-vocab-falcon.gguf filter=lfs diff=lfs merge=lfs -text
|
129 |
+
llama.cpp/models/ggml-vocab-gpt-2.gguf filter=lfs diff=lfs merge=lfs -text
|
130 |
+
llama.cpp/models/ggml-vocab-gpt-neox.gguf filter=lfs diff=lfs merge=lfs -text
|
131 |
+
llama.cpp/models/ggml-vocab-llama-bpe.gguf filter=lfs diff=lfs merge=lfs -text
|
132 |
+
llama.cpp/models/ggml-vocab-llama-spm.gguf filter=lfs diff=lfs merge=lfs -text
|
133 |
+
llama.cpp/models/ggml-vocab-mpt.gguf filter=lfs diff=lfs merge=lfs -text
|
134 |
+
llama.cpp/models/ggml-vocab-phi-3.gguf filter=lfs diff=lfs merge=lfs -text
|
135 |
+
llama.cpp/models/ggml-vocab-qwen2.gguf filter=lfs diff=lfs merge=lfs -text
|
136 |
+
llama.cpp/models/ggml-vocab-refact.gguf filter=lfs diff=lfs merge=lfs -text
|
137 |
+
llama.cpp/models/ggml-vocab-starcoder.gguf filter=lfs diff=lfs merge=lfs -text
|
llama.cpp/.DS_Store
ADDED
Binary file (10.2 kB). View file
|
|
llama.cpp/.clang-format
ADDED
@@ -0,0 +1,161 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
Language: Cpp
|
3 |
+
AlignAfterOpenBracket: Align
|
4 |
+
AlignArrayOfStructures: Left
|
5 |
+
AlignConsecutiveAssignments: AcrossComments
|
6 |
+
AlignConsecutiveBitFields: AcrossComments
|
7 |
+
AlignConsecutiveDeclarations: AcrossComments
|
8 |
+
AlignConsecutiveMacros: AcrossComments
|
9 |
+
# AlignConsecutiveShortCaseStatements: AcrossComments
|
10 |
+
AlignEscapedNewlines: Left # LeftWithLastLine
|
11 |
+
AlignOperands: Align
|
12 |
+
AlignTrailingComments:
|
13 |
+
Kind: Always
|
14 |
+
OverEmptyLines: 1
|
15 |
+
AllowAllArgumentsOnNextLine: true
|
16 |
+
AllowAllParametersOfDeclarationOnNextLine: false
|
17 |
+
# AllowBreakBeforeNoexceptSpecifier: OnlyWithParen
|
18 |
+
AllowShortBlocksOnASingleLine: Never
|
19 |
+
AllowShortCaseLabelsOnASingleLine: false
|
20 |
+
AllowShortFunctionsOnASingleLine: Inline
|
21 |
+
AllowShortIfStatementsOnASingleLine: Never
|
22 |
+
AllowShortLambdasOnASingleLine: Inline
|
23 |
+
AllowShortLoopsOnASingleLine: false
|
24 |
+
AlwaysBreakBeforeMultilineStrings: true
|
25 |
+
BinPackArguments: true
|
26 |
+
BinPackParameters: true # OnePerLine
|
27 |
+
BitFieldColonSpacing: Both
|
28 |
+
BreakBeforeBraces: Custom # Attach
|
29 |
+
BraceWrapping:
|
30 |
+
AfterCaseLabel: true
|
31 |
+
AfterClass: false
|
32 |
+
AfterControlStatement: false
|
33 |
+
AfterEnum: false
|
34 |
+
AfterFunction: false
|
35 |
+
AfterNamespace: false
|
36 |
+
AfterObjCDeclaration: false
|
37 |
+
AfterStruct: false
|
38 |
+
AfterUnion: false
|
39 |
+
AfterExternBlock: false
|
40 |
+
BeforeCatch: false
|
41 |
+
BeforeElse: false
|
42 |
+
BeforeLambdaBody: false
|
43 |
+
BeforeWhile: false
|
44 |
+
IndentBraces: false
|
45 |
+
SplitEmptyFunction: false
|
46 |
+
SplitEmptyRecord: false
|
47 |
+
SplitEmptyNamespace: false
|
48 |
+
# BreakAdjacentStringLiterals: true
|
49 |
+
BreakAfterAttributes: Never
|
50 |
+
BreakBeforeBinaryOperators: None
|
51 |
+
BreakBeforeInlineASMColon: OnlyMultiline
|
52 |
+
BreakBeforeTernaryOperators: false
|
53 |
+
# BreakBinaryOperations: Never
|
54 |
+
BreakConstructorInitializers: AfterColon
|
55 |
+
# BreakFunctionDefinitionParameters: false
|
56 |
+
BreakInheritanceList: AfterComma
|
57 |
+
BreakStringLiterals: true
|
58 |
+
# BreakTemplateDeclarations: Yes
|
59 |
+
ColumnLimit: 120
|
60 |
+
CommentPragmas: '^ IWYU pragma:'
|
61 |
+
CompactNamespaces: false
|
62 |
+
ConstructorInitializerIndentWidth: 4
|
63 |
+
ContinuationIndentWidth: 4
|
64 |
+
Cpp11BracedListStyle: false
|
65 |
+
DerivePointerAlignment: false
|
66 |
+
DisableFormat: false
|
67 |
+
EmptyLineBeforeAccessModifier: Leave
|
68 |
+
EmptyLineAfterAccessModifier: Never
|
69 |
+
ExperimentalAutoDetectBinPacking: false
|
70 |
+
FixNamespaceComments: true
|
71 |
+
IncludeBlocks: Regroup
|
72 |
+
IncludeCategories:
|
73 |
+
- Regex: '^<.*\.h>'
|
74 |
+
Priority: 1
|
75 |
+
SortPriority: 0
|
76 |
+
- Regex: '^<.*'
|
77 |
+
Priority: 2
|
78 |
+
SortPriority: 0
|
79 |
+
- Regex: '.*'
|
80 |
+
Priority: 3
|
81 |
+
SortPriority: 0
|
82 |
+
IncludeIsMainRegex: '([-_](test|unittest))?$'
|
83 |
+
IncludeIsMainSourceRegex: ''
|
84 |
+
IndentAccessModifiers: false
|
85 |
+
IndentCaseBlocks: true
|
86 |
+
IndentCaseLabels: true
|
87 |
+
IndentExternBlock: NoIndent
|
88 |
+
IndentGotoLabels: false
|
89 |
+
IndentPPDirectives: AfterHash
|
90 |
+
IndentWidth: 4
|
91 |
+
IndentWrappedFunctionNames: false
|
92 |
+
InsertBraces: true # NOTE: may lead to incorrect formatting
|
93 |
+
InsertNewlineAtEOF: true
|
94 |
+
JavaScriptQuotes: Leave
|
95 |
+
JavaScriptWrapImports: true
|
96 |
+
KeepEmptyLinesAtTheStartOfBlocks: false
|
97 |
+
LambdaBodyIndentation: Signature
|
98 |
+
LineEnding: LF
|
99 |
+
MacroBlockBegin: ''
|
100 |
+
MacroBlockEnd: ''
|
101 |
+
MaxEmptyLinesToKeep: 1
|
102 |
+
NamespaceIndentation: None
|
103 |
+
ObjCBinPackProtocolList: Auto
|
104 |
+
ObjCBlockIndentWidth: 4
|
105 |
+
ObjCSpaceAfterProperty: true
|
106 |
+
ObjCSpaceBeforeProtocolList: true
|
107 |
+
PPIndentWidth: -1
|
108 |
+
PackConstructorInitializers: CurrentLine
|
109 |
+
PenaltyBreakAssignment: 2
|
110 |
+
PenaltyBreakBeforeFirstCallParameter: 1
|
111 |
+
PenaltyBreakComment: 300
|
112 |
+
PenaltyBreakFirstLessLess: 120
|
113 |
+
PenaltyBreakString: 1000
|
114 |
+
PenaltyBreakTemplateDeclaration: 10
|
115 |
+
PenaltyExcessCharacter: 1000000
|
116 |
+
PenaltyReturnTypeOnItsOwnLine: 200
|
117 |
+
PointerAlignment: Middle
|
118 |
+
QualifierAlignment: Left
|
119 |
+
#QualifierOrder: ['static', 'inline', 'friend', 'constexpr', 'const', 'volatile', 'type', 'restrict']
|
120 |
+
RawStringFormats:
|
121 |
+
- Language: Cpp
|
122 |
+
Delimiters:
|
123 |
+
- cc
|
124 |
+
- CC
|
125 |
+
- cpp
|
126 |
+
- Cpp
|
127 |
+
- CPP
|
128 |
+
- 'c++'
|
129 |
+
- 'C++'
|
130 |
+
CanonicalDelimiter: ''
|
131 |
+
ReferenceAlignment: Middle
|
132 |
+
ReflowComments: false # IndentOnly
|
133 |
+
SeparateDefinitionBlocks: Always
|
134 |
+
SortIncludes: CaseInsensitive
|
135 |
+
SortUsingDeclarations: LexicographicNumeric
|
136 |
+
SpaceAfterCStyleCast: true
|
137 |
+
SpaceAfterLogicalNot: false
|
138 |
+
SpaceAfterTemplateKeyword: true
|
139 |
+
SpaceBeforeAssignmentOperators: true
|
140 |
+
SpaceBeforeCpp11BracedList: false
|
141 |
+
SpaceBeforeCtorInitializerColon: true
|
142 |
+
SpaceBeforeInheritanceColon: true
|
143 |
+
SpaceBeforeParens: ControlStatements
|
144 |
+
SpaceBeforeRangeBasedForLoopColon: true
|
145 |
+
SpaceInEmptyBlock: false
|
146 |
+
SpaceInEmptyParentheses: false
|
147 |
+
SpacesBeforeTrailingComments: 2
|
148 |
+
SpacesInAngles: Never
|
149 |
+
SpacesInContainerLiterals: true
|
150 |
+
SpacesInLineCommentPrefix:
|
151 |
+
Minimum: 1
|
152 |
+
Maximum: -1
|
153 |
+
SpacesInParentheses: false
|
154 |
+
SpacesInSquareBrackets: false
|
155 |
+
SpaceBeforeSquareBrackets: false
|
156 |
+
Standard: c++17
|
157 |
+
TabWidth: 4
|
158 |
+
UseTab: Never
|
159 |
+
WhitespaceSensitiveMacros: ['STRINGIZE']
|
160 |
+
...
|
161 |
+
|
llama.cpp/.clang-tidy
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
Checks: >
|
3 |
+
bugprone-*,
|
4 |
+
-bugprone-easily-swappable-parameters,
|
5 |
+
-bugprone-implicit-widening-of-multiplication-result,
|
6 |
+
-bugprone-misplaced-widening-cast,
|
7 |
+
-bugprone-narrowing-conversions,
|
8 |
+
readability-*,
|
9 |
+
-readability-avoid-unconditional-preprocessor-if,
|
10 |
+
-readability-function-cognitive-complexity,
|
11 |
+
-readability-identifier-length,
|
12 |
+
-readability-implicit-bool-conversion,
|
13 |
+
-readability-magic-numbers,
|
14 |
+
-readability-uppercase-literal-suffix,
|
15 |
+
-readability-simplify-boolean-expr,
|
16 |
+
clang-analyzer-*,
|
17 |
+
-clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling,
|
18 |
+
performance-*,
|
19 |
+
portability-*,
|
20 |
+
-portability-simd-intrinsics,
|
21 |
+
misc-*,
|
22 |
+
-misc-const-correctness,
|
23 |
+
-misc-non-private-member-variables-in-classes,
|
24 |
+
-misc-no-recursion,
|
25 |
+
-misc-use-anonymous-namespace,
|
26 |
+
FormatStyle: none
|
llama.cpp/.devops/cloud-v-pipeline
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
node('x86_runner1'){ // Running on x86 runner containing latest vector qemu, latest vector gcc and all the necessary libraries
|
2 |
+
stage('Cleanup'){
|
3 |
+
cleanWs() // Cleaning previous CI build in workspace
|
4 |
+
}
|
5 |
+
stage('checkout repo'){
|
6 |
+
retry(5){ // Retry if the cloning fails due to some reason
|
7 |
+
checkout scm // Clone the repo on Runner
|
8 |
+
}
|
9 |
+
}
|
10 |
+
stage('Compiling llama.cpp'){
|
11 |
+
sh'''#!/bin/bash
|
12 |
+
make RISCV=1 RISCV_CROSS_COMPILE=1 # Compiling llama for RISC-V
|
13 |
+
'''
|
14 |
+
}
|
15 |
+
stage('Running llama.cpp'){
|
16 |
+
sh'''#!/bin/bash
|
17 |
+
module load gnu-bin2/0.1 # loading latest versions of vector qemu and vector gcc
|
18 |
+
qemu-riscv64 -L /softwares/gnu-bin2/sysroot -cpu rv64,v=true,vlen=256,elen=64,vext_spec=v1.0 ./llama-cli -m /home/alitariq/codellama-7b.Q4_K_M.gguf -p "Anything" -n 9 > llama_log.txt # Running llama.cpp on vector qemu-riscv64
|
19 |
+
cat llama_log.txt # Printing results
|
20 |
+
'''
|
21 |
+
}
|
22 |
+
}
|
llama.cpp/.devops/cpu.Dockerfile
ADDED
@@ -0,0 +1,92 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ARG UBUNTU_VERSION=22.04
|
2 |
+
|
3 |
+
FROM ubuntu:$UBUNTU_VERSION AS build
|
4 |
+
|
5 |
+
ARG TARGETARCH
|
6 |
+
|
7 |
+
ARG GGML_CPU_ARM_ARCH=armv8-a
|
8 |
+
|
9 |
+
RUN apt-get update && \
|
10 |
+
apt-get install -y build-essential git cmake libcurl4-openssl-dev
|
11 |
+
|
12 |
+
WORKDIR /app
|
13 |
+
|
14 |
+
COPY . .
|
15 |
+
|
16 |
+
RUN if [ "$TARGETARCH" = "amd64" ]; then \
|
17 |
+
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DLLAMA_CURL=ON -DGGML_NATIVE=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON; \
|
18 |
+
elif [ "$TARGETARCH" = "arm64" ]; then \
|
19 |
+
cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DLLAMA_CURL=ON -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=${GGML_CPU_ARM_ARCH}; \
|
20 |
+
else \
|
21 |
+
echo "Unsupported architecture"; \
|
22 |
+
exit 1; \
|
23 |
+
fi && \
|
24 |
+
cmake --build build -j $(nproc)
|
25 |
+
|
26 |
+
RUN mkdir -p /app/lib && \
|
27 |
+
find build -name "*.so" -exec cp {} /app/lib \;
|
28 |
+
|
29 |
+
RUN mkdir -p /app/full \
|
30 |
+
&& cp build/bin/* /app/full \
|
31 |
+
&& cp *.py /app/full \
|
32 |
+
&& cp -r gguf-py /app/full \
|
33 |
+
&& cp -r requirements /app/full \
|
34 |
+
&& cp requirements.txt /app/full \
|
35 |
+
&& cp .devops/tools.sh /app/full/tools.sh
|
36 |
+
|
37 |
+
## Base image
|
38 |
+
FROM ubuntu:$UBUNTU_VERSION AS base
|
39 |
+
|
40 |
+
RUN apt-get update \
|
41 |
+
&& apt-get install -y libgomp1 curl\
|
42 |
+
&& apt autoremove -y \
|
43 |
+
&& apt clean -y \
|
44 |
+
&& rm -rf /tmp/* /var/tmp/* \
|
45 |
+
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
46 |
+
&& find /var/cache -type f -delete
|
47 |
+
|
48 |
+
COPY --from=build /app/lib/ /app
|
49 |
+
|
50 |
+
### Full
|
51 |
+
FROM base AS full
|
52 |
+
|
53 |
+
COPY --from=build /app/full /app
|
54 |
+
|
55 |
+
WORKDIR /app
|
56 |
+
|
57 |
+
RUN apt-get update \
|
58 |
+
&& apt-get install -y \
|
59 |
+
git \
|
60 |
+
python3 \
|
61 |
+
python3-pip \
|
62 |
+
&& pip install --upgrade pip setuptools wheel \
|
63 |
+
&& pip install -r requirements.txt \
|
64 |
+
&& apt autoremove -y \
|
65 |
+
&& apt clean -y \
|
66 |
+
&& rm -rf /tmp/* /var/tmp/* \
|
67 |
+
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
68 |
+
&& find /var/cache -type f -delete
|
69 |
+
|
70 |
+
ENTRYPOINT ["/app/tools.sh"]
|
71 |
+
|
72 |
+
### Light, CLI only
|
73 |
+
FROM base AS light
|
74 |
+
|
75 |
+
COPY --from=build /app/full/llama-cli /app
|
76 |
+
|
77 |
+
WORKDIR /app
|
78 |
+
|
79 |
+
ENTRYPOINT [ "/app/llama-cli" ]
|
80 |
+
|
81 |
+
### Server, Server only
|
82 |
+
FROM base AS server
|
83 |
+
|
84 |
+
ENV LLAMA_ARG_HOST=0.0.0.0
|
85 |
+
|
86 |
+
COPY --from=build /app/full/llama-server /app
|
87 |
+
|
88 |
+
WORKDIR /app
|
89 |
+
|
90 |
+
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
91 |
+
|
92 |
+
ENTRYPOINT [ "/app/llama-server" ]
|
llama.cpp/.devops/cuda.Dockerfile
ADDED
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ARG UBUNTU_VERSION=22.04
|
2 |
+
# This needs to generally match the container host's environment.
|
3 |
+
ARG CUDA_VERSION=12.6.0
|
4 |
+
# Target the CUDA build image
|
5 |
+
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
|
6 |
+
|
7 |
+
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
|
8 |
+
|
9 |
+
FROM ${BASE_CUDA_DEV_CONTAINER} AS build
|
10 |
+
|
11 |
+
# CUDA architecture to build for (defaults to all supported archs)
|
12 |
+
ARG CUDA_DOCKER_ARCH=default
|
13 |
+
|
14 |
+
RUN apt-get update && \
|
15 |
+
apt-get install -y build-essential cmake python3 python3-pip git libcurl4-openssl-dev libgomp1
|
16 |
+
|
17 |
+
WORKDIR /app
|
18 |
+
|
19 |
+
COPY . .
|
20 |
+
|
21 |
+
RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
|
22 |
+
export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
|
23 |
+
fi && \
|
24 |
+
cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
|
25 |
+
cmake --build build --config Release -j$(nproc)
|
26 |
+
|
27 |
+
RUN mkdir -p /app/lib && \
|
28 |
+
find build -name "*.so" -exec cp {} /app/lib \;
|
29 |
+
|
30 |
+
RUN mkdir -p /app/full \
|
31 |
+
&& cp build/bin/* /app/full \
|
32 |
+
&& cp *.py /app/full \
|
33 |
+
&& cp -r gguf-py /app/full \
|
34 |
+
&& cp -r requirements /app/full \
|
35 |
+
&& cp requirements.txt /app/full \
|
36 |
+
&& cp .devops/tools.sh /app/full/tools.sh
|
37 |
+
|
38 |
+
## Base image
|
39 |
+
FROM ${BASE_CUDA_RUN_CONTAINER} AS base
|
40 |
+
|
41 |
+
RUN apt-get update \
|
42 |
+
&& apt-get install -y libgomp1 curl\
|
43 |
+
&& apt autoremove -y \
|
44 |
+
&& apt clean -y \
|
45 |
+
&& rm -rf /tmp/* /var/tmp/* \
|
46 |
+
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
47 |
+
&& find /var/cache -type f -delete
|
48 |
+
|
49 |
+
COPY --from=build /app/lib/ /app
|
50 |
+
|
51 |
+
### Full
|
52 |
+
FROM base AS full
|
53 |
+
|
54 |
+
COPY --from=build /app/full /app
|
55 |
+
|
56 |
+
WORKDIR /app
|
57 |
+
|
58 |
+
RUN apt-get update \
|
59 |
+
&& apt-get install -y \
|
60 |
+
git \
|
61 |
+
python3 \
|
62 |
+
python3-pip \
|
63 |
+
&& pip install --upgrade pip setuptools wheel \
|
64 |
+
&& pip install -r requirements.txt \
|
65 |
+
&& apt autoremove -y \
|
66 |
+
&& apt clean -y \
|
67 |
+
&& rm -rf /tmp/* /var/tmp/* \
|
68 |
+
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
69 |
+
&& find /var/cache -type f -delete
|
70 |
+
|
71 |
+
|
72 |
+
ENTRYPOINT ["/app/tools.sh"]
|
73 |
+
|
74 |
+
### Light, CLI only
|
75 |
+
FROM base AS light
|
76 |
+
|
77 |
+
COPY --from=build /app/full/llama-cli /app
|
78 |
+
|
79 |
+
WORKDIR /app
|
80 |
+
|
81 |
+
ENTRYPOINT [ "/app/llama-cli" ]
|
82 |
+
|
83 |
+
### Server, Server only
|
84 |
+
FROM base AS server
|
85 |
+
|
86 |
+
ENV LLAMA_ARG_HOST=0.0.0.0
|
87 |
+
|
88 |
+
COPY --from=build /app/full/llama-server /app
|
89 |
+
|
90 |
+
WORKDIR /app
|
91 |
+
|
92 |
+
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
93 |
+
|
94 |
+
ENTRYPOINT [ "/app/llama-server" ]
|
llama.cpp/.devops/intel.Dockerfile
ADDED
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ARG ONEAPI_VERSION=2025.0.0-0-devel-ubuntu22.04
|
2 |
+
|
3 |
+
## Build Image
|
4 |
+
|
5 |
+
FROM intel/oneapi-basekit:$ONEAPI_VERSION AS build
|
6 |
+
|
7 |
+
ARG GGML_SYCL_F16=OFF
|
8 |
+
RUN apt-get update && \
|
9 |
+
apt-get install -y git libcurl4-openssl-dev
|
10 |
+
|
11 |
+
WORKDIR /app
|
12 |
+
|
13 |
+
COPY . .
|
14 |
+
|
15 |
+
RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
|
16 |
+
echo "GGML_SYCL_F16 is set" \
|
17 |
+
&& export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \
|
18 |
+
fi && \
|
19 |
+
echo "Building with dynamic libs" && \
|
20 |
+
cmake -B build -DGGML_NATIVE=OFF -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON ${OPT_SYCL_F16} && \
|
21 |
+
cmake --build build --config Release -j$(nproc)
|
22 |
+
|
23 |
+
RUN mkdir -p /app/lib && \
|
24 |
+
find build -name "*.so" -exec cp {} /app/lib \;
|
25 |
+
|
26 |
+
RUN mkdir -p /app/full \
|
27 |
+
&& cp build/bin/* /app/full \
|
28 |
+
&& cp *.py /app/full \
|
29 |
+
&& cp -r gguf-py /app/full \
|
30 |
+
&& cp -r requirements /app/full \
|
31 |
+
&& cp requirements.txt /app/full \
|
32 |
+
&& cp .devops/tools.sh /app/full/tools.sh
|
33 |
+
|
34 |
+
FROM intel/oneapi-basekit:$ONEAPI_VERSION AS base
|
35 |
+
|
36 |
+
RUN apt-get update \
|
37 |
+
&& apt-get install -y libgomp1 curl\
|
38 |
+
&& apt autoremove -y \
|
39 |
+
&& apt clean -y \
|
40 |
+
&& rm -rf /tmp/* /var/tmp/* \
|
41 |
+
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
42 |
+
&& find /var/cache -type f -delete
|
43 |
+
|
44 |
+
### Full
|
45 |
+
FROM base AS full
|
46 |
+
|
47 |
+
COPY --from=build /app/lib/ /app
|
48 |
+
COPY --from=build /app/full /app
|
49 |
+
|
50 |
+
WORKDIR /app
|
51 |
+
|
52 |
+
RUN apt-get update \
|
53 |
+
&& apt-get install -y \
|
54 |
+
git \
|
55 |
+
python3 \
|
56 |
+
python3-pip \
|
57 |
+
&& pip install --upgrade pip setuptools wheel \
|
58 |
+
&& pip install -r requirements.txt \
|
59 |
+
&& apt autoremove -y \
|
60 |
+
&& apt clean -y \
|
61 |
+
&& rm -rf /tmp/* /var/tmp/* \
|
62 |
+
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
63 |
+
&& find /var/cache -type f -delete
|
64 |
+
|
65 |
+
|
66 |
+
ENTRYPOINT ["/app/tools.sh"]
|
67 |
+
|
68 |
+
### Light, CLI only
|
69 |
+
FROM base AS light
|
70 |
+
|
71 |
+
COPY --from=build /app/lib/ /app
|
72 |
+
COPY --from=build /app/full/llama-cli /app
|
73 |
+
|
74 |
+
WORKDIR /app
|
75 |
+
|
76 |
+
ENTRYPOINT [ "/app/llama-cli" ]
|
77 |
+
|
78 |
+
### Server, Server only
|
79 |
+
FROM base AS server
|
80 |
+
|
81 |
+
ENV LLAMA_ARG_HOST=0.0.0.0
|
82 |
+
|
83 |
+
COPY --from=build /app/lib/ /app
|
84 |
+
COPY --from=build /app/full/llama-server /app
|
85 |
+
|
86 |
+
WORKDIR /app
|
87 |
+
|
88 |
+
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
89 |
+
|
90 |
+
ENTRYPOINT [ "/app/llama-server" ]
|
91 |
+
|
llama.cpp/.devops/llama-cli-cann.Dockerfile
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ARG ASCEND_VERSION=8.0.rc2.alpha003-910b-openeuler22.03-py3.8
|
2 |
+
|
3 |
+
FROM ascendai/cann:$ASCEND_VERSION AS build
|
4 |
+
|
5 |
+
WORKDIR /app
|
6 |
+
|
7 |
+
COPY . .
|
8 |
+
|
9 |
+
RUN yum install -y gcc g++ cmake make
|
10 |
+
ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
|
11 |
+
ENV LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:$LIBRARY_PATH
|
12 |
+
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH}
|
13 |
+
ENV PYTHONPATH=${ASCEND_TOOLKIT_HOME}/python/site-packages:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe:${PYTHONPATH}
|
14 |
+
ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${ASCEND_TOOLKIT_HOME}/compiler/ccec_compiler/bin:${PATH}
|
15 |
+
ENV ASCEND_AICPU_PATH=${ASCEND_TOOLKIT_HOME}
|
16 |
+
ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
|
17 |
+
ENV TOOLCHAIN_HOME=${ASCEND_TOOLKIT_HOME}/toolkit
|
18 |
+
ENV ASCEND_HOME_PATH=${ASCEND_TOOLKIT_HOME}
|
19 |
+
|
20 |
+
# find libascend_hal.so, because the drive hasn`t been mounted.
|
21 |
+
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/runtime/lib64/stub:$LD_LIBRARY_PATH
|
22 |
+
|
23 |
+
RUN echo "Building with static libs" && \
|
24 |
+
source /usr/local/Ascend/ascend-toolkit/set_env.sh --force && \
|
25 |
+
cmake -B build -DGGML_NATIVE=OFF -DGGML_CANN=ON -DBUILD_SHARED_LIBS=OFF && \
|
26 |
+
cmake --build build --config Release --target llama-cli
|
27 |
+
|
28 |
+
# TODO: use image with NNRT
|
29 |
+
FROM ascendai/cann:$ASCEND_VERSION AS runtime
|
30 |
+
COPY --from=build /app/build/bin/llama-cli /llama-cli
|
31 |
+
|
32 |
+
ENV LC_ALL=C.utf8
|
33 |
+
|
34 |
+
ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
|
35 |
+
ENV LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:$LIBRARY_PATH
|
36 |
+
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH}
|
37 |
+
ENV PYTHONPATH=${ASCEND_TOOLKIT_HOME}/python/site-packages:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe:${PYTHONPATH}
|
38 |
+
ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${ASCEND_TOOLKIT_HOME}/compiler/ccec_compiler/bin:${PATH}
|
39 |
+
ENV ASCEND_AICPU_PATH=${ASCEND_TOOLKIT_HOME}
|
40 |
+
ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
|
41 |
+
ENV TOOLCHAIN_HOME=${ASCEND_TOOLKIT_HOME}/toolkit
|
42 |
+
ENV ASCEND_HOME_PATH=${ASCEND_TOOLKIT_HOME}
|
43 |
+
|
44 |
+
ENTRYPOINT ["/llama-cli" ]
|
llama.cpp/.devops/llama-cpp-cuda.srpm.spec
ADDED
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# SRPM for building from source and packaging an RPM for RPM-based distros.
|
2 |
+
# https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages
|
3 |
+
# Built and maintained by John Boero - [email protected]
|
4 |
+
# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
|
5 |
+
|
6 |
+
# Notes for llama.cpp:
|
7 |
+
# 1. Tags are currently based on hash - which will not sort asciibetically.
|
8 |
+
# We need to declare standard versioning if people want to sort latest releases.
|
9 |
+
# 2. Builds for CUDA/OpenCL support are separate, with different depenedencies.
|
10 |
+
# 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed.
|
11 |
+
# Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo
|
12 |
+
# 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries.
|
13 |
+
# It is up to the user to install the correct vendor-specific support.
|
14 |
+
|
15 |
+
Name: llama.cpp-cuda
|
16 |
+
Version: %( date "+%%Y%%m%%d" )
|
17 |
+
Release: 1%{?dist}
|
18 |
+
Summary: CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL)
|
19 |
+
License: MIT
|
20 |
+
Source0: https://github.com/ggerganov/llama.cpp/archive/refs/heads/master.tar.gz
|
21 |
+
BuildRequires: coreutils make gcc-c++ git cuda-toolkit
|
22 |
+
Requires: cuda-toolkit
|
23 |
+
URL: https://github.com/ggerganov/llama.cpp
|
24 |
+
|
25 |
+
%define debug_package %{nil}
|
26 |
+
%define source_date_epoch_from_changelog 0
|
27 |
+
|
28 |
+
%description
|
29 |
+
CPU inference for Meta's Lllama2 models using default options.
|
30 |
+
|
31 |
+
%prep
|
32 |
+
%setup -n llama.cpp-master
|
33 |
+
|
34 |
+
%build
|
35 |
+
make -j GGML_CUDA=1
|
36 |
+
|
37 |
+
%install
|
38 |
+
mkdir -p %{buildroot}%{_bindir}/
|
39 |
+
cp -p llama-cli %{buildroot}%{_bindir}/llama-cuda-cli
|
40 |
+
cp -p llama-server %{buildroot}%{_bindir}/llama-cuda-server
|
41 |
+
cp -p llama-simple %{buildroot}%{_bindir}/llama-cuda-simple
|
42 |
+
|
43 |
+
mkdir -p %{buildroot}/usr/lib/systemd/system
|
44 |
+
%{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llamacuda.service
|
45 |
+
[Unit]
|
46 |
+
Description=Llama.cpp server, CPU only (no GPU support in this build).
|
47 |
+
After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target
|
48 |
+
|
49 |
+
[Service]
|
50 |
+
Type=simple
|
51 |
+
EnvironmentFile=/etc/sysconfig/llama
|
52 |
+
ExecStart=/usr/bin/llama-cuda-server $LLAMA_ARGS
|
53 |
+
ExecReload=/bin/kill -s HUP $MAINPID
|
54 |
+
Restart=never
|
55 |
+
|
56 |
+
[Install]
|
57 |
+
WantedBy=default.target
|
58 |
+
EOF
|
59 |
+
|
60 |
+
mkdir -p %{buildroot}/etc/sysconfig
|
61 |
+
%{__cat} <<EOF > %{buildroot}/etc/sysconfig/llama
|
62 |
+
LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin"
|
63 |
+
EOF
|
64 |
+
|
65 |
+
%clean
|
66 |
+
rm -rf %{buildroot}
|
67 |
+
rm -rf %{_builddir}/*
|
68 |
+
|
69 |
+
%files
|
70 |
+
%{_bindir}/llama-cuda-cli
|
71 |
+
%{_bindir}/llama-cuda-server
|
72 |
+
%{_bindir}/llama-cuda-simple
|
73 |
+
/usr/lib/systemd/system/llamacuda.service
|
74 |
+
%config /etc/sysconfig/llama
|
75 |
+
|
76 |
+
%pre
|
77 |
+
|
78 |
+
%post
|
79 |
+
|
80 |
+
%preun
|
81 |
+
%postun
|
82 |
+
|
83 |
+
%changelog
|
llama.cpp/.devops/llama-cpp.srpm.spec
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# SRPM for building from source and packaging an RPM for RPM-based distros.
|
2 |
+
# https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages
|
3 |
+
# Built and maintained by John Boero - [email protected]
|
4 |
+
# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
|
5 |
+
|
6 |
+
# Notes for llama.cpp:
|
7 |
+
# 1. Tags are currently based on hash - which will not sort asciibetically.
|
8 |
+
# We need to declare standard versioning if people want to sort latest releases.
|
9 |
+
# In the meantime, YYYYMMDD format will be used.
|
10 |
+
# 2. Builds for CUDA/OpenCL support are separate, with different depenedencies.
|
11 |
+
# 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed.
|
12 |
+
# Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo
|
13 |
+
# 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries.
|
14 |
+
# It is up to the user to install the correct vendor-specific support.
|
15 |
+
|
16 |
+
Name: llama.cpp
|
17 |
+
Version: %( date "+%%Y%%m%%d" )
|
18 |
+
Release: 1%{?dist}
|
19 |
+
Summary: CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL)
|
20 |
+
License: MIT
|
21 |
+
Source0: https://github.com/ggerganov/llama.cpp/archive/refs/heads/master.tar.gz
|
22 |
+
BuildRequires: coreutils make gcc-c++ git libstdc++-devel
|
23 |
+
Requires: libstdc++
|
24 |
+
URL: https://github.com/ggerganov/llama.cpp
|
25 |
+
|
26 |
+
%define debug_package %{nil}
|
27 |
+
%define source_date_epoch_from_changelog 0
|
28 |
+
|
29 |
+
%description
|
30 |
+
CPU inference for Meta's Lllama2 models using default options.
|
31 |
+
Models are not included in this package and must be downloaded separately.
|
32 |
+
|
33 |
+
%prep
|
34 |
+
%setup -n llama.cpp-master
|
35 |
+
|
36 |
+
%build
|
37 |
+
make -j
|
38 |
+
|
39 |
+
%install
|
40 |
+
mkdir -p %{buildroot}%{_bindir}/
|
41 |
+
cp -p llama-cli %{buildroot}%{_bindir}/llama-cli
|
42 |
+
cp -p llama-server %{buildroot}%{_bindir}/llama-server
|
43 |
+
cp -p llama-simple %{buildroot}%{_bindir}/llama-simple
|
44 |
+
|
45 |
+
mkdir -p %{buildroot}/usr/lib/systemd/system
|
46 |
+
%{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llama.service
|
47 |
+
[Unit]
|
48 |
+
Description=Llama.cpp server, CPU only (no GPU support in this build).
|
49 |
+
After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target
|
50 |
+
|
51 |
+
[Service]
|
52 |
+
Type=simple
|
53 |
+
EnvironmentFile=/etc/sysconfig/llama
|
54 |
+
ExecStart=/usr/bin/llama-server $LLAMA_ARGS
|
55 |
+
ExecReload=/bin/kill -s HUP $MAINPID
|
56 |
+
Restart=never
|
57 |
+
|
58 |
+
[Install]
|
59 |
+
WantedBy=default.target
|
60 |
+
EOF
|
61 |
+
|
62 |
+
mkdir -p %{buildroot}/etc/sysconfig
|
63 |
+
%{__cat} <<EOF > %{buildroot}/etc/sysconfig/llama
|
64 |
+
LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin"
|
65 |
+
EOF
|
66 |
+
|
67 |
+
%clean
|
68 |
+
rm -rf %{buildroot}
|
69 |
+
rm -rf %{_builddir}/*
|
70 |
+
|
71 |
+
%files
|
72 |
+
%{_bindir}/llama-cli
|
73 |
+
%{_bindir}/llama-server
|
74 |
+
%{_bindir}/llama-simple
|
75 |
+
/usr/lib/systemd/system/llama.service
|
76 |
+
%config /etc/sysconfig/llama
|
77 |
+
|
78 |
+
%pre
|
79 |
+
|
80 |
+
%post
|
81 |
+
|
82 |
+
%preun
|
83 |
+
%postun
|
84 |
+
|
85 |
+
%changelog
|
llama.cpp/.devops/musa.Dockerfile
ADDED
@@ -0,0 +1,108 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ARG UBUNTU_VERSION=22.04
|
2 |
+
# This needs to generally match the container host's environment.
|
3 |
+
ARG MUSA_VERSION=rc3.1.0
|
4 |
+
# Target the MUSA build image
|
5 |
+
ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
|
6 |
+
|
7 |
+
ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
|
8 |
+
|
9 |
+
FROM ${BASE_MUSA_DEV_CONTAINER} AS build
|
10 |
+
|
11 |
+
# MUSA architecture to build for (defaults to all supported archs)
|
12 |
+
ARG MUSA_DOCKER_ARCH=default
|
13 |
+
|
14 |
+
RUN apt-get update && \
|
15 |
+
apt-get install -y \
|
16 |
+
build-essential \
|
17 |
+
cmake \
|
18 |
+
python3 \
|
19 |
+
python3-pip \
|
20 |
+
git \
|
21 |
+
libcurl4-openssl-dev \
|
22 |
+
libgomp1
|
23 |
+
|
24 |
+
COPY requirements.txt requirements.txt
|
25 |
+
COPY requirements requirements
|
26 |
+
|
27 |
+
RUN pip install --upgrade pip setuptools wheel \
|
28 |
+
&& pip install -r requirements.txt
|
29 |
+
|
30 |
+
WORKDIR /app
|
31 |
+
|
32 |
+
COPY . .
|
33 |
+
|
34 |
+
# Use the default MUSA archs if not specified
|
35 |
+
RUN if [ "${MUSA_DOCKER_ARCH}" != "default" ]; then \
|
36 |
+
export CMAKE_ARGS="-DMUSA_ARCHITECTURES=${MUSA_DOCKER_ARCH}"; \
|
37 |
+
fi && \
|
38 |
+
cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
|
39 |
+
cmake --build build --config Release -j$(nproc)
|
40 |
+
|
41 |
+
RUN mkdir -p /app/lib && \
|
42 |
+
find build -name "*.so" -exec cp {} /app/lib \;
|
43 |
+
|
44 |
+
RUN mkdir -p /app/full \
|
45 |
+
&& cp build/bin/* /app/full \
|
46 |
+
&& cp *.py /app/full \
|
47 |
+
&& cp -r gguf-py /app/full \
|
48 |
+
&& cp -r requirements /app/full \
|
49 |
+
&& cp requirements.txt /app/full \
|
50 |
+
&& cp .devops/tools.sh /app/full/tools.sh
|
51 |
+
|
52 |
+
## Base image
|
53 |
+
FROM ${BASE_MUSA_RUN_CONTAINER} AS base
|
54 |
+
|
55 |
+
RUN apt-get update \
|
56 |
+
&& apt-get install -y libgomp1 curl\
|
57 |
+
&& apt autoremove -y \
|
58 |
+
&& apt clean -y \
|
59 |
+
&& rm -rf /tmp/* /var/tmp/* \
|
60 |
+
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
61 |
+
&& find /var/cache -type f -delete
|
62 |
+
|
63 |
+
COPY --from=build /app/lib/ /app
|
64 |
+
|
65 |
+
### Full
|
66 |
+
FROM base AS full
|
67 |
+
|
68 |
+
COPY --from=build /app/full /app
|
69 |
+
|
70 |
+
WORKDIR /app
|
71 |
+
|
72 |
+
RUN apt-get update \
|
73 |
+
&& apt-get install -y \
|
74 |
+
git \
|
75 |
+
python3 \
|
76 |
+
python3-pip \
|
77 |
+
&& pip install --upgrade pip setuptools wheel \
|
78 |
+
&& pip install -r requirements.txt \
|
79 |
+
&& apt autoremove -y \
|
80 |
+
&& apt clean -y \
|
81 |
+
&& rm -rf /tmp/* /var/tmp/* \
|
82 |
+
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
83 |
+
&& find /var/cache -type f -delete
|
84 |
+
|
85 |
+
|
86 |
+
ENTRYPOINT ["/app/tools.sh"]
|
87 |
+
|
88 |
+
### Light, CLI only
|
89 |
+
FROM base AS light
|
90 |
+
|
91 |
+
COPY --from=build /app/full/llama-cli /app
|
92 |
+
|
93 |
+
WORKDIR /app
|
94 |
+
|
95 |
+
ENTRYPOINT [ "/app/llama-cli" ]
|
96 |
+
|
97 |
+
### Server, Server only
|
98 |
+
FROM base AS server
|
99 |
+
|
100 |
+
ENV LLAMA_ARG_HOST=0.0.0.0
|
101 |
+
|
102 |
+
COPY --from=build /app/full/llama-server /app
|
103 |
+
|
104 |
+
WORKDIR /app
|
105 |
+
|
106 |
+
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
107 |
+
|
108 |
+
ENTRYPOINT [ "/app/llama-server" ]
|
llama.cpp/.devops/nix/apps.nix
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
perSystem =
|
3 |
+
{ config, lib, ... }:
|
4 |
+
{
|
5 |
+
apps =
|
6 |
+
let
|
7 |
+
inherit (config.packages) default;
|
8 |
+
binaries = [
|
9 |
+
"llama-cli"
|
10 |
+
"llama-embedding"
|
11 |
+
"llama-server"
|
12 |
+
"llama-quantize"
|
13 |
+
];
|
14 |
+
mkApp = name: {
|
15 |
+
type = "app";
|
16 |
+
program = "${default}/bin/${name}";
|
17 |
+
};
|
18 |
+
in
|
19 |
+
lib.genAttrs binaries mkApp;
|
20 |
+
};
|
21 |
+
}
|
llama.cpp/.devops/nix/devshells.nix
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{ inputs, ... }:
|
2 |
+
|
3 |
+
{
|
4 |
+
perSystem =
|
5 |
+
{
|
6 |
+
config,
|
7 |
+
lib,
|
8 |
+
system,
|
9 |
+
...
|
10 |
+
}:
|
11 |
+
{
|
12 |
+
devShells =
|
13 |
+
let
|
14 |
+
pkgs = import inputs.nixpkgs { inherit system; };
|
15 |
+
stdenv = pkgs.stdenv;
|
16 |
+
scripts = config.packages.python-scripts;
|
17 |
+
in
|
18 |
+
lib.pipe (config.packages) [
|
19 |
+
(lib.concatMapAttrs (
|
20 |
+
name: package: {
|
21 |
+
${name} = pkgs.mkShell {
|
22 |
+
name = "${name}";
|
23 |
+
inputsFrom = [ package ];
|
24 |
+
shellHook = ''
|
25 |
+
echo "Entering ${name} devShell"
|
26 |
+
'';
|
27 |
+
};
|
28 |
+
"${name}-extra" =
|
29 |
+
if (name == "python-scripts") then
|
30 |
+
null
|
31 |
+
else
|
32 |
+
pkgs.mkShell {
|
33 |
+
name = "${name}-extra";
|
34 |
+
inputsFrom = [
|
35 |
+
package
|
36 |
+
scripts
|
37 |
+
];
|
38 |
+
# Extra packages that *may* be used by some scripts
|
39 |
+
packages = [
|
40 |
+
pkgs.python3Packages.tiktoken
|
41 |
+
];
|
42 |
+
shellHook = ''
|
43 |
+
echo "Entering ${name} devShell"
|
44 |
+
addToSearchPath "LD_LIBRARY_PATH" "${lib.getLib stdenv.cc.cc}/lib"
|
45 |
+
'';
|
46 |
+
};
|
47 |
+
}
|
48 |
+
))
|
49 |
+
(lib.filterAttrs (name: value: value != null))
|
50 |
+
];
|
51 |
+
};
|
52 |
+
}
|
llama.cpp/.devops/nix/docker.nix
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
lib,
|
3 |
+
dockerTools,
|
4 |
+
buildEnv,
|
5 |
+
llama-cpp,
|
6 |
+
interactive ? true,
|
7 |
+
coreutils,
|
8 |
+
}:
|
9 |
+
|
10 |
+
# A tar that can be fed into `docker load`:
|
11 |
+
#
|
12 |
+
# $ nix build .#llamaPackages.docker
|
13 |
+
# $ docker load < result
|
14 |
+
|
15 |
+
# For details and variations cf.
|
16 |
+
# - https://nixos.org/manual/nixpkgs/unstable/#ssec-pkgs-dockerTools-buildLayeredImage
|
17 |
+
# - https://discourse.nixos.org/t/a-faster-dockertools-buildimage-prototype/16922
|
18 |
+
# - https://nixery.dev/
|
19 |
+
|
20 |
+
# Approximate (compressed) sizes, at the time of writing, are:
|
21 |
+
#
|
22 |
+
# .#llamaPackages.docker: 125M;
|
23 |
+
# .#llamaPackagesCuda.docker: 537M;
|
24 |
+
# .#legacyPackages.aarch64-linux.llamaPackagesXavier.docker: 415M.
|
25 |
+
|
26 |
+
dockerTools.buildLayeredImage {
|
27 |
+
name = llama-cpp.pname;
|
28 |
+
tag = "latest";
|
29 |
+
|
30 |
+
contents =
|
31 |
+
[ llama-cpp ]
|
32 |
+
++ lib.optionals interactive [
|
33 |
+
coreutils
|
34 |
+
dockerTools.binSh
|
35 |
+
dockerTools.caCertificates
|
36 |
+
];
|
37 |
+
}
|
llama.cpp/.devops/nix/jetson-support.nix
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{ inputs, ... }:
|
2 |
+
{
|
3 |
+
perSystem =
|
4 |
+
{
|
5 |
+
config,
|
6 |
+
system,
|
7 |
+
lib,
|
8 |
+
pkgsCuda,
|
9 |
+
...
|
10 |
+
}:
|
11 |
+
{
|
12 |
+
legacyPackages =
|
13 |
+
let
|
14 |
+
caps.llamaPackagesXavier = "7.2";
|
15 |
+
caps.llamaPackagesOrin = "8.7";
|
16 |
+
caps.llamaPackagesTX2 = "6.2";
|
17 |
+
caps.llamaPackagesNano = "5.3";
|
18 |
+
|
19 |
+
pkgsFor =
|
20 |
+
cap:
|
21 |
+
import inputs.nixpkgs {
|
22 |
+
inherit system;
|
23 |
+
config = {
|
24 |
+
cudaSupport = true;
|
25 |
+
cudaCapabilities = [ cap ];
|
26 |
+
cudaEnableForwardCompat = false;
|
27 |
+
inherit (pkgsCuda.config) allowUnfreePredicate;
|
28 |
+
};
|
29 |
+
};
|
30 |
+
in
|
31 |
+
builtins.mapAttrs (name: cap: (pkgsFor cap).callPackage ./scope.nix { }) caps;
|
32 |
+
|
33 |
+
packages = lib.optionalAttrs (system == "aarch64-linux") {
|
34 |
+
jetson-xavier = config.legacyPackages.llamaPackagesXavier.llama-cpp;
|
35 |
+
jetson-orin = config.legacyPackages.llamaPackagesOrin.llama-cpp;
|
36 |
+
jetson-nano = config.legacyPackages.llamaPackagesNano.llama-cpp;
|
37 |
+
};
|
38 |
+
};
|
39 |
+
}
|
llama.cpp/.devops/nix/nixpkgs-instances.nix
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{ inputs, ... }:
|
2 |
+
{
|
3 |
+
# The _module.args definitions are passed on to modules as arguments. E.g.
|
4 |
+
# the module `{ pkgs ... }: { /* config */ }` implicitly uses
|
5 |
+
# `_module.args.pkgs` (defined in this case by flake-parts).
|
6 |
+
perSystem =
|
7 |
+
{ system, ... }:
|
8 |
+
{
|
9 |
+
_module.args = {
|
10 |
+
# Note: bringing up https://zimbatm.com/notes/1000-instances-of-nixpkgs
|
11 |
+
# again, the below creates several nixpkgs instances which the
|
12 |
+
# flake-centric CLI will be forced to evaluate e.g. on `nix flake show`.
|
13 |
+
#
|
14 |
+
# This is currently "slow" and "expensive", on a certain scale.
|
15 |
+
# This also isn't "right" in that this hinders dependency injection at
|
16 |
+
# the level of flake inputs. This might get removed in the foreseeable
|
17 |
+
# future.
|
18 |
+
#
|
19 |
+
# Note that you can use these expressions without Nix
|
20 |
+
# (`pkgs.callPackage ./devops/nix/scope.nix { }` is the entry point).
|
21 |
+
|
22 |
+
pkgsCuda = import inputs.nixpkgs {
|
23 |
+
inherit system;
|
24 |
+
# Ensure dependencies use CUDA consistently (e.g. that openmpi, ucc,
|
25 |
+
# and ucx are built with CUDA support)
|
26 |
+
config.cudaSupport = true;
|
27 |
+
config.allowUnfreePredicate =
|
28 |
+
p:
|
29 |
+
builtins.all (
|
30 |
+
license:
|
31 |
+
license.free
|
32 |
+
|| builtins.elem license.shortName [
|
33 |
+
"CUDA EULA"
|
34 |
+
"cuDNN EULA"
|
35 |
+
]
|
36 |
+
) (p.meta.licenses or [ p.meta.license ]);
|
37 |
+
};
|
38 |
+
# Ensure dependencies use ROCm consistently
|
39 |
+
pkgsRocm = import inputs.nixpkgs {
|
40 |
+
inherit system;
|
41 |
+
config.rocmSupport = true;
|
42 |
+
};
|
43 |
+
};
|
44 |
+
};
|
45 |
+
}
|
llama.cpp/.devops/nix/package-gguf-py.nix
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
lib,
|
3 |
+
llamaVersion,
|
4 |
+
numpy,
|
5 |
+
tqdm,
|
6 |
+
sentencepiece,
|
7 |
+
pyyaml,
|
8 |
+
poetry-core,
|
9 |
+
buildPythonPackage,
|
10 |
+
pytestCheckHook,
|
11 |
+
}:
|
12 |
+
|
13 |
+
buildPythonPackage {
|
14 |
+
pname = "gguf";
|
15 |
+
version = llamaVersion;
|
16 |
+
pyproject = true;
|
17 |
+
nativeBuildInputs = [ poetry-core ];
|
18 |
+
propagatedBuildInputs = [
|
19 |
+
numpy
|
20 |
+
tqdm
|
21 |
+
sentencepiece
|
22 |
+
pyyaml
|
23 |
+
];
|
24 |
+
src = lib.cleanSource ../../gguf-py;
|
25 |
+
pythonImportsCheck = [
|
26 |
+
"numpy"
|
27 |
+
"gguf"
|
28 |
+
];
|
29 |
+
nativeCheckInputs = [ pytestCheckHook ];
|
30 |
+
doCheck = true;
|
31 |
+
meta = with lib; {
|
32 |
+
description = "Python package for writing binary files in the GGUF format";
|
33 |
+
license = licenses.mit;
|
34 |
+
maintainers = [ maintainers.ditsuke ];
|
35 |
+
};
|
36 |
+
}
|
llama.cpp/.devops/nix/package.nix
ADDED
@@ -0,0 +1,247 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
lib,
|
3 |
+
glibc,
|
4 |
+
config,
|
5 |
+
stdenv,
|
6 |
+
runCommand,
|
7 |
+
cmake,
|
8 |
+
ninja,
|
9 |
+
pkg-config,
|
10 |
+
git,
|
11 |
+
mpi,
|
12 |
+
blas,
|
13 |
+
cudaPackages,
|
14 |
+
autoAddDriverRunpath,
|
15 |
+
darwin,
|
16 |
+
rocmPackages,
|
17 |
+
vulkan-headers,
|
18 |
+
vulkan-loader,
|
19 |
+
curl,
|
20 |
+
shaderc,
|
21 |
+
useBlas ?
|
22 |
+
builtins.all (x: !x) [
|
23 |
+
useCuda
|
24 |
+
useMetalKit
|
25 |
+
useRocm
|
26 |
+
useVulkan
|
27 |
+
]
|
28 |
+
&& blas.meta.available,
|
29 |
+
useCuda ? config.cudaSupport,
|
30 |
+
useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin,
|
31 |
+
# Increases the runtime closure size by ~700M
|
32 |
+
useMpi ? false,
|
33 |
+
useRocm ? config.rocmSupport,
|
34 |
+
rocmGpuTargets ? builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets,
|
35 |
+
enableCurl ? true,
|
36 |
+
useVulkan ? false,
|
37 |
+
llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake
|
38 |
+
|
39 |
+
# It's necessary to consistently use backendStdenv when building with CUDA support,
|
40 |
+
# otherwise we get libstdc++ errors downstream.
|
41 |
+
effectiveStdenv ? if useCuda then cudaPackages.backendStdenv else stdenv,
|
42 |
+
enableStatic ? effectiveStdenv.hostPlatform.isStatic,
|
43 |
+
precompileMetalShaders ? false,
|
44 |
+
}:
|
45 |
+
|
46 |
+
let
|
47 |
+
inherit (lib)
|
48 |
+
cmakeBool
|
49 |
+
cmakeFeature
|
50 |
+
optionals
|
51 |
+
strings
|
52 |
+
;
|
53 |
+
|
54 |
+
stdenv = throw "Use effectiveStdenv instead";
|
55 |
+
|
56 |
+
suffices =
|
57 |
+
lib.optionals useBlas [ "BLAS" ]
|
58 |
+
++ lib.optionals useCuda [ "CUDA" ]
|
59 |
+
++ lib.optionals useMetalKit [ "MetalKit" ]
|
60 |
+
++ lib.optionals useMpi [ "MPI" ]
|
61 |
+
++ lib.optionals useRocm [ "ROCm" ]
|
62 |
+
++ lib.optionals useVulkan [ "Vulkan" ];
|
63 |
+
|
64 |
+
pnameSuffix =
|
65 |
+
strings.optionalString (suffices != [ ])
|
66 |
+
"-${strings.concatMapStringsSep "-" strings.toLower suffices}";
|
67 |
+
descriptionSuffix = strings.optionalString (
|
68 |
+
suffices != [ ]
|
69 |
+
) ", accelerated with ${strings.concatStringsSep ", " suffices}";
|
70 |
+
|
71 |
+
xcrunHost = runCommand "xcrunHost" { } ''
|
72 |
+
mkdir -p $out/bin
|
73 |
+
ln -s /usr/bin/xcrun $out/bin
|
74 |
+
'';
|
75 |
+
|
76 |
+
# apple_sdk is supposed to choose sane defaults, no need to handle isAarch64
|
77 |
+
# separately
|
78 |
+
darwinBuildInputs =
|
79 |
+
with darwin.apple_sdk.frameworks;
|
80 |
+
[
|
81 |
+
Accelerate
|
82 |
+
CoreVideo
|
83 |
+
CoreGraphics
|
84 |
+
]
|
85 |
+
++ optionals useMetalKit [ MetalKit ];
|
86 |
+
|
87 |
+
cudaBuildInputs = with cudaPackages; [
|
88 |
+
cuda_cudart
|
89 |
+
cuda_cccl # <nv/target>
|
90 |
+
libcublas
|
91 |
+
];
|
92 |
+
|
93 |
+
rocmBuildInputs = with rocmPackages; [
|
94 |
+
clr
|
95 |
+
hipblas
|
96 |
+
rocblas
|
97 |
+
];
|
98 |
+
|
99 |
+
vulkanBuildInputs = [
|
100 |
+
vulkan-headers
|
101 |
+
vulkan-loader
|
102 |
+
shaderc
|
103 |
+
];
|
104 |
+
in
|
105 |
+
|
106 |
+
effectiveStdenv.mkDerivation (finalAttrs: {
|
107 |
+
pname = "llama-cpp${pnameSuffix}";
|
108 |
+
version = llamaVersion;
|
109 |
+
|
110 |
+
# Note: none of the files discarded here are visible in the sandbox or
|
111 |
+
# affect the output hash. This also means they can be modified without
|
112 |
+
# triggering a rebuild.
|
113 |
+
src = lib.cleanSourceWith {
|
114 |
+
filter =
|
115 |
+
name: type:
|
116 |
+
let
|
117 |
+
noneOf = builtins.all (x: !x);
|
118 |
+
baseName = baseNameOf name;
|
119 |
+
in
|
120 |
+
noneOf [
|
121 |
+
(lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths
|
122 |
+
(lib.hasSuffix ".md" name) # Ignore *.md changes whe computing outPaths
|
123 |
+
(lib.hasPrefix "." baseName) # Skip hidden files and directories
|
124 |
+
(baseName == "flake.lock")
|
125 |
+
];
|
126 |
+
src = lib.cleanSource ../../.;
|
127 |
+
};
|
128 |
+
|
129 |
+
postPatch = ''
|
130 |
+
substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \
|
131 |
+
--replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
|
132 |
+
substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \
|
133 |
+
--replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";"
|
134 |
+
'';
|
135 |
+
|
136 |
+
# With PR#6015 https://github.com/ggerganov/llama.cpp/pull/6015,
|
137 |
+
# `default.metallib` may be compiled with Metal compiler from XCode
|
138 |
+
# and we need to escape sandbox on MacOS to access Metal compiler.
|
139 |
+
# `xcrun` is used find the path of the Metal compiler, which is varible
|
140 |
+
# and not on $PATH
|
141 |
+
# see https://github.com/ggerganov/llama.cpp/pull/6118 for discussion
|
142 |
+
__noChroot = effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders;
|
143 |
+
|
144 |
+
nativeBuildInputs =
|
145 |
+
[
|
146 |
+
cmake
|
147 |
+
ninja
|
148 |
+
pkg-config
|
149 |
+
git
|
150 |
+
]
|
151 |
+
++ optionals useCuda [
|
152 |
+
cudaPackages.cuda_nvcc
|
153 |
+
|
154 |
+
autoAddDriverRunpath
|
155 |
+
]
|
156 |
+
++ optionals (effectiveStdenv.hostPlatform.isGnu && enableStatic) [ glibc.static ]
|
157 |
+
++ optionals (effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders) [ xcrunHost ];
|
158 |
+
|
159 |
+
buildInputs =
|
160 |
+
optionals effectiveStdenv.isDarwin darwinBuildInputs
|
161 |
+
++ optionals useCuda cudaBuildInputs
|
162 |
+
++ optionals useMpi [ mpi ]
|
163 |
+
++ optionals useRocm rocmBuildInputs
|
164 |
+
++ optionals useBlas [ blas ]
|
165 |
+
++ optionals useVulkan vulkanBuildInputs
|
166 |
+
++ optionals enableCurl [ curl ];
|
167 |
+
|
168 |
+
cmakeFlags =
|
169 |
+
[
|
170 |
+
(cmakeBool "LLAMA_BUILD_SERVER" true)
|
171 |
+
(cmakeBool "BUILD_SHARED_LIBS" (!enableStatic))
|
172 |
+
(cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
|
173 |
+
(cmakeBool "LLAMA_CURL" enableCurl)
|
174 |
+
(cmakeBool "GGML_NATIVE" false)
|
175 |
+
(cmakeBool "GGML_BLAS" useBlas)
|
176 |
+
(cmakeBool "GGML_CUDA" useCuda)
|
177 |
+
(cmakeBool "GGML_HIP" useRocm)
|
178 |
+
(cmakeBool "GGML_METAL" useMetalKit)
|
179 |
+
(cmakeBool "GGML_VULKAN" useVulkan)
|
180 |
+
(cmakeBool "GGML_STATIC" enableStatic)
|
181 |
+
]
|
182 |
+
++ optionals useCuda [
|
183 |
+
(
|
184 |
+
with cudaPackages.flags;
|
185 |
+
cmakeFeature "CMAKE_CUDA_ARCHITECTURES" (
|
186 |
+
builtins.concatStringsSep ";" (map dropDot cudaCapabilities)
|
187 |
+
)
|
188 |
+
)
|
189 |
+
]
|
190 |
+
++ optionals useRocm [
|
191 |
+
(cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.llvm.clang}/bin/clang")
|
192 |
+
(cmakeFeature "CMAKE_HIP_ARCHITECTURES" rocmGpuTargets)
|
193 |
+
]
|
194 |
+
++ optionals useMetalKit [
|
195 |
+
(lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1")
|
196 |
+
(cmakeBool "GGML_METAL_EMBED_LIBRARY" (!precompileMetalShaders))
|
197 |
+
];
|
198 |
+
|
199 |
+
# Environment variables needed for ROCm
|
200 |
+
env = optionals useRocm {
|
201 |
+
ROCM_PATH = "${rocmPackages.clr}";
|
202 |
+
HIP_DEVICE_LIB_PATH = "${rocmPackages.rocm-device-libs}/amdgcn/bitcode";
|
203 |
+
};
|
204 |
+
|
205 |
+
# TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
|
206 |
+
# if they haven't been added yet.
|
207 |
+
postInstall = ''
|
208 |
+
mkdir -p $out/include
|
209 |
+
cp $src/include/llama.h $out/include/
|
210 |
+
'';
|
211 |
+
|
212 |
+
meta = {
|
213 |
+
# Configurations we don't want even the CI to evaluate. Results in the
|
214 |
+
# "unsupported platform" messages. This is mostly a no-op, because
|
215 |
+
# cudaPackages would've refused to evaluate anyway.
|
216 |
+
badPlatforms = optionals useCuda lib.platforms.darwin;
|
217 |
+
|
218 |
+
# Configurations that are known to result in build failures. Can be
|
219 |
+
# overridden by importing Nixpkgs with `allowBroken = true`.
|
220 |
+
broken = (useMetalKit && !effectiveStdenv.isDarwin);
|
221 |
+
|
222 |
+
description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
|
223 |
+
homepage = "https://github.com/ggerganov/llama.cpp/";
|
224 |
+
license = lib.licenses.mit;
|
225 |
+
|
226 |
+
# Accommodates `nix run` and `lib.getExe`
|
227 |
+
mainProgram = "llama-cli";
|
228 |
+
|
229 |
+
# These people might respond, on the best effort basis, if you ping them
|
230 |
+
# in case of Nix-specific regressions or for reviewing Nix-specific PRs.
|
231 |
+
# Consider adding yourself to this list if you want to ensure this flake
|
232 |
+
# stays maintained and you're willing to invest your time. Do not add
|
233 |
+
# other people without their consent. Consider removing people after
|
234 |
+
# they've been unreachable for long periods of time.
|
235 |
+
|
236 |
+
# Note that lib.maintainers is defined in Nixpkgs, but you may just add
|
237 |
+
# an attrset following the same format as in
|
238 |
+
# https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix
|
239 |
+
maintainers = with lib.maintainers; [
|
240 |
+
philiptaron
|
241 |
+
SomeoneSerge
|
242 |
+
];
|
243 |
+
|
244 |
+
# Extend `badPlatforms` instead
|
245 |
+
platforms = lib.platforms.all;
|
246 |
+
};
|
247 |
+
})
|
llama.cpp/.devops/nix/python-scripts.nix
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
lib,
|
3 |
+
stdenv,
|
4 |
+
buildPythonPackage,
|
5 |
+
poetry-core,
|
6 |
+
mkShell,
|
7 |
+
python3Packages,
|
8 |
+
gguf-py,
|
9 |
+
}@inputs:
|
10 |
+
|
11 |
+
let
|
12 |
+
llama-python-deps = with python3Packages; [
|
13 |
+
numpy
|
14 |
+
sentencepiece
|
15 |
+
transformers
|
16 |
+
protobuf
|
17 |
+
torchWithoutCuda
|
18 |
+
gguf-py
|
19 |
+
tqdm
|
20 |
+
|
21 |
+
# for scripts/compare-llama-bench.py
|
22 |
+
gitpython
|
23 |
+
tabulate
|
24 |
+
|
25 |
+
# for examples/pydantic-models-to-grammar-examples.py
|
26 |
+
docstring-parser
|
27 |
+
pydantic
|
28 |
+
|
29 |
+
];
|
30 |
+
|
31 |
+
llama-python-test-deps = with python3Packages; [
|
32 |
+
# Server bench
|
33 |
+
matplotlib
|
34 |
+
|
35 |
+
# server tests
|
36 |
+
openai
|
37 |
+
pytest
|
38 |
+
prometheus-client
|
39 |
+
];
|
40 |
+
in
|
41 |
+
|
42 |
+
buildPythonPackage ({
|
43 |
+
pname = "llama-scripts";
|
44 |
+
version = "0.0.0";
|
45 |
+
pyproject = true;
|
46 |
+
|
47 |
+
# NOTE: The files filtered out here are not visible in the build sandbox, neither
|
48 |
+
# do they affect the output hash. They can be modified without triggering a rebuild.
|
49 |
+
src = lib.cleanSourceWith {
|
50 |
+
filter =
|
51 |
+
name: type:
|
52 |
+
let
|
53 |
+
any = builtins.any (x: x);
|
54 |
+
baseName = builtins.baseNameOf name;
|
55 |
+
in
|
56 |
+
any [
|
57 |
+
(lib.hasSuffix ".py" name)
|
58 |
+
(baseName == "README.md")
|
59 |
+
(baseName == "pyproject.toml")
|
60 |
+
];
|
61 |
+
src = lib.cleanSource ../../.;
|
62 |
+
};
|
63 |
+
nativeBuildInputs = [ poetry-core ];
|
64 |
+
nativeCheckInputs = llama-python-test-deps;
|
65 |
+
dependencies = llama-python-deps;
|
66 |
+
})
|
llama.cpp/.devops/nix/scope.nix
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
lib,
|
3 |
+
newScope,
|
4 |
+
python3,
|
5 |
+
llamaVersion ? "0.0.0",
|
6 |
+
}:
|
7 |
+
|
8 |
+
let
|
9 |
+
pythonPackages = python3.pkgs;
|
10 |
+
buildPythonPackage = pythonPackages.buildPythonPackage;
|
11 |
+
numpy = pythonPackages.numpy;
|
12 |
+
tqdm = pythonPackages.tqdm;
|
13 |
+
sentencepiece = pythonPackages.sentencepiece;
|
14 |
+
pyyaml = pythonPackages.pyyaml;
|
15 |
+
poetry-core = pythonPackages.poetry-core;
|
16 |
+
pytestCheckHook = pythonPackages.pytestCheckHook;
|
17 |
+
in
|
18 |
+
|
19 |
+
# We're using `makeScope` instead of just writing out an attrset
|
20 |
+
# because it allows users to apply overlays later using `overrideScope'`.
|
21 |
+
# Cf. https://noogle.dev/f/lib/makeScope
|
22 |
+
|
23 |
+
lib.makeScope newScope (self: {
|
24 |
+
inherit llamaVersion;
|
25 |
+
gguf-py = self.callPackage ./package-gguf-py.nix {
|
26 |
+
inherit
|
27 |
+
buildPythonPackage
|
28 |
+
numpy
|
29 |
+
tqdm
|
30 |
+
sentencepiece
|
31 |
+
poetry-core
|
32 |
+
pyyaml
|
33 |
+
pytestCheckHook
|
34 |
+
;
|
35 |
+
};
|
36 |
+
python-scripts = self.callPackage ./python-scripts.nix { inherit buildPythonPackage poetry-core; };
|
37 |
+
llama-cpp = self.callPackage ./package.nix { };
|
38 |
+
docker = self.callPackage ./docker.nix { };
|
39 |
+
docker-min = self.callPackage ./docker.nix { interactive = false; };
|
40 |
+
sif = self.callPackage ./sif.nix { };
|
41 |
+
})
|
llama.cpp/.devops/nix/sif.nix
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
lib,
|
3 |
+
singularity-tools,
|
4 |
+
llama-cpp,
|
5 |
+
bashInteractive,
|
6 |
+
interactive ? false,
|
7 |
+
}:
|
8 |
+
|
9 |
+
let
|
10 |
+
optionalInt = cond: x: if cond then x else 0;
|
11 |
+
in
|
12 |
+
singularity-tools.buildImage rec {
|
13 |
+
inherit (llama-cpp) name;
|
14 |
+
contents = [ llama-cpp ] ++ lib.optionals interactive [ bashInteractive ];
|
15 |
+
|
16 |
+
# These are excessive (but safe) for most variants. Building singularity
|
17 |
+
# images requires superuser privileges, so we build them inside a VM in a
|
18 |
+
# writable image of pre-determined size.
|
19 |
+
#
|
20 |
+
# ROCm is currently affected by https://github.com/NixOS/nixpkgs/issues/276846
|
21 |
+
#
|
22 |
+
# Expected image sizes:
|
23 |
+
# - cpu/blas: 150M,
|
24 |
+
# - cuda, all gencodes: 560M,
|
25 |
+
diskSize = 4096 + optionalInt llama-cpp.useRocm 16384;
|
26 |
+
memSize = diskSize;
|
27 |
+
}
|
llama.cpp/.devops/rocm.Dockerfile
ADDED
@@ -0,0 +1,113 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ARG UBUNTU_VERSION=24.04
|
2 |
+
|
3 |
+
# This needs to generally match the container host's environment.
|
4 |
+
ARG ROCM_VERSION=6.3
|
5 |
+
ARG AMDGPU_VERSION=6.3
|
6 |
+
|
7 |
+
# Target the CUDA build image
|
8 |
+
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
|
9 |
+
|
10 |
+
### Build image
|
11 |
+
FROM ${BASE_ROCM_DEV_CONTAINER} AS build
|
12 |
+
|
13 |
+
# Unless otherwise specified, we make a fat build.
|
14 |
+
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
|
15 |
+
# This is mostly tied to rocBLAS supported archs.
|
16 |
+
# gfx803, gfx900, gfx1032, gfx1101, gfx1102,not officialy supported
|
17 |
+
# gfx906 is deprecated
|
18 |
+
#check https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.2.4/reference/system-requirements.html
|
19 |
+
|
20 |
+
#ARG ROCM_DOCKER_ARCH='gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102'
|
21 |
+
ARG ROCM_DOCKER_ARCH=gfx1100
|
22 |
+
|
23 |
+
# Set nvcc architectured
|
24 |
+
ENV AMDGPU_TARGETS=${ROCM_DOCKER_ARCH}
|
25 |
+
# Enable ROCm
|
26 |
+
# ENV CC=/opt/rocm/llvm/bin/clang
|
27 |
+
# ENV CXX=/opt/rocm/llvm/bin/clang++
|
28 |
+
|
29 |
+
RUN apt-get update \
|
30 |
+
&& apt-get install -y \
|
31 |
+
build-essential \
|
32 |
+
cmake \
|
33 |
+
git \
|
34 |
+
libcurl4-openssl-dev \
|
35 |
+
curl \
|
36 |
+
libgomp1
|
37 |
+
|
38 |
+
WORKDIR /app
|
39 |
+
|
40 |
+
COPY . .
|
41 |
+
|
42 |
+
RUN HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \
|
43 |
+
cmake -S . -B build -DGGML_HIP=ON -DAMDGPU_TARGETS=$ROCM_DOCKER_ARCH -DCMAKE_BUILD_TYPE=Release -DLLAMA_CURL=ON \
|
44 |
+
&& cmake --build build --config Release -j$(nproc)
|
45 |
+
|
46 |
+
RUN mkdir -p /app/lib \
|
47 |
+
&& find build -name "*.so" -exec cp {} /app/lib \;
|
48 |
+
|
49 |
+
RUN mkdir -p /app/full \
|
50 |
+
&& cp build/bin/* /app/full \
|
51 |
+
&& cp *.py /app/full \
|
52 |
+
&& cp -r gguf-py /app/full \
|
53 |
+
&& cp -r requirements /app/full \
|
54 |
+
&& cp requirements.txt /app/full \
|
55 |
+
&& cp .devops/tools.sh /app/full/tools.sh
|
56 |
+
|
57 |
+
## Base image
|
58 |
+
FROM ${BASE_ROCM_DEV_CONTAINER} AS base
|
59 |
+
|
60 |
+
RUN apt-get update \
|
61 |
+
&& apt-get install -y libgomp1 curl\
|
62 |
+
&& apt autoremove -y \
|
63 |
+
&& apt clean -y \
|
64 |
+
&& rm -rf /tmp/* /var/tmp/* \
|
65 |
+
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
66 |
+
&& find /var/cache -type f -delete
|
67 |
+
|
68 |
+
COPY --from=build /app/lib/ /app
|
69 |
+
|
70 |
+
### Full
|
71 |
+
FROM base AS full
|
72 |
+
|
73 |
+
COPY --from=build /app/full /app
|
74 |
+
|
75 |
+
WORKDIR /app
|
76 |
+
|
77 |
+
RUN apt-get update \
|
78 |
+
&& apt-get install -y \
|
79 |
+
git \
|
80 |
+
python3-pip \
|
81 |
+
python3 \
|
82 |
+
python3-wheel\
|
83 |
+
&& pip install --break-system-packages --upgrade setuptools \
|
84 |
+
&& pip install --break-system-packages -r requirements.txt \
|
85 |
+
&& apt autoremove -y \
|
86 |
+
&& apt clean -y \
|
87 |
+
&& rm -rf /tmp/* /var/tmp/* \
|
88 |
+
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
89 |
+
&& find /var/cache -type f -delete
|
90 |
+
|
91 |
+
ENTRYPOINT ["/app/tools.sh"]
|
92 |
+
|
93 |
+
### Light, CLI only
|
94 |
+
FROM base AS light
|
95 |
+
|
96 |
+
COPY --from=build /app/full/llama-cli /app
|
97 |
+
|
98 |
+
WORKDIR /app
|
99 |
+
|
100 |
+
ENTRYPOINT [ "/app/llama-cli" ]
|
101 |
+
|
102 |
+
### Server, Server only
|
103 |
+
FROM base AS server
|
104 |
+
|
105 |
+
ENV LLAMA_ARG_HOST=0.0.0.0
|
106 |
+
|
107 |
+
COPY --from=build /app/full/llama-server /app
|
108 |
+
|
109 |
+
WORKDIR /app
|
110 |
+
|
111 |
+
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
112 |
+
|
113 |
+
ENTRYPOINT [ "/app/llama-server" ]
|
llama.cpp/.devops/tools.sh
ADDED
@@ -0,0 +1,49 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
set -e
|
3 |
+
|
4 |
+
# Read the first argument into a variable
|
5 |
+
arg1="$1"
|
6 |
+
|
7 |
+
# Shift the arguments to remove the first one
|
8 |
+
shift
|
9 |
+
|
10 |
+
if [[ "$arg1" == '--convert' || "$arg1" == '-c' ]]; then
|
11 |
+
exec python3 ./convert_hf_to_gguf.py "$@"
|
12 |
+
elif [[ "$arg1" == '--quantize' || "$arg1" == '-q' ]]; then
|
13 |
+
exec ./llama-quantize "$@"
|
14 |
+
elif [[ "$arg1" == '--run' || "$arg1" == '-r' ]]; then
|
15 |
+
exec ./llama-cli "$@"
|
16 |
+
elif [[ "$arg1" == '--bench' || "$arg1" == '-b' ]]; then
|
17 |
+
exec ./llama-bench "$@"
|
18 |
+
elif [[ "$arg1" == '--perplexity' || "$arg1" == '-p' ]]; then
|
19 |
+
exec ./llama-perplexity "$@"
|
20 |
+
elif [[ "$arg1" == '--all-in-one' || "$arg1" == '-a' ]]; then
|
21 |
+
echo "Converting PTH to GGML..."
|
22 |
+
for i in $(ls $1/$2/ggml-model-f16.bin*); do
|
23 |
+
if [ -f "${i/f16/q4_0}" ]; then
|
24 |
+
echo "Skip model quantization, it already exists: ${i/f16/q4_0}"
|
25 |
+
else
|
26 |
+
echo "Converting PTH to GGML: $i into ${i/f16/q4_0}..."
|
27 |
+
exec ./llama-quantize "$i" "${i/f16/q4_0}" q4_0
|
28 |
+
fi
|
29 |
+
done
|
30 |
+
elif [[ "$arg1" == '--server' || "$arg1" == '-s' ]]; then
|
31 |
+
exec ./llama-server "$@"
|
32 |
+
else
|
33 |
+
echo "Unknown command: $arg1"
|
34 |
+
echo "Available commands: "
|
35 |
+
echo " --run (-r): Run a model previously converted into ggml"
|
36 |
+
echo " ex: -m /models/7B/ggml-model-q4_0.bin -p \"Building a website can be done in 10 simple steps:\" -n 512"
|
37 |
+
echo " --bench (-b): Benchmark the performance of the inference for various parameters."
|
38 |
+
echo " ex: -m model.gguf"
|
39 |
+
echo " --perplexity (-p): Measure the perplexity of a model over a given text."
|
40 |
+
echo " ex: -m model.gguf -f file.txt"
|
41 |
+
echo " --convert (-c): Convert a llama model into ggml"
|
42 |
+
echo " ex: --outtype f16 \"/models/7B/\" "
|
43 |
+
echo " --quantize (-q): Optimize with quantization process ggml"
|
44 |
+
echo " ex: \"/models/7B/ggml-model-f16.bin\" \"/models/7B/ggml-model-q4_0.bin\" 2"
|
45 |
+
echo " --all-in-one (-a): Execute --convert & --quantize"
|
46 |
+
echo " ex: \"/models/\" 7B"
|
47 |
+
echo " --server (-s): Run a model on the server"
|
48 |
+
echo " ex: -m /models/7B/ggml-model-q4_0.bin -c 2048 -ngl 43 -mg 1 --port 8080"
|
49 |
+
fi
|
llama.cpp/.devops/vulkan.Dockerfile
ADDED
@@ -0,0 +1,89 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ARG UBUNTU_VERSION=24.04
|
2 |
+
|
3 |
+
FROM ubuntu:$UBUNTU_VERSION AS build
|
4 |
+
|
5 |
+
# Install build tools
|
6 |
+
RUN apt update && apt install -y git build-essential cmake wget
|
7 |
+
|
8 |
+
# Install Vulkan SDK and cURL
|
9 |
+
RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
|
10 |
+
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-noble.list https://packages.lunarg.com/vulkan/lunarg-vulkan-noble.list && \
|
11 |
+
apt update -y && \
|
12 |
+
apt-get install -y vulkan-sdk libcurl4-openssl-dev curl
|
13 |
+
|
14 |
+
# Build it
|
15 |
+
WORKDIR /app
|
16 |
+
|
17 |
+
COPY . .
|
18 |
+
|
19 |
+
RUN cmake -B build -DGGML_NATIVE=OFF -DGGML_VULKAN=1 -DLLAMA_CURL=1 && \
|
20 |
+
cmake --build build --config Release -j$(nproc)
|
21 |
+
|
22 |
+
RUN mkdir -p /app/lib && \
|
23 |
+
find build -name "*.so" -exec cp {} /app/lib \;
|
24 |
+
|
25 |
+
RUN mkdir -p /app/full \
|
26 |
+
&& cp build/bin/* /app/full \
|
27 |
+
&& cp *.py /app/full \
|
28 |
+
&& cp -r gguf-py /app/full \
|
29 |
+
&& cp -r requirements /app/full \
|
30 |
+
&& cp requirements.txt /app/full \
|
31 |
+
&& cp .devops/tools.sh /app/full/tools.sh
|
32 |
+
|
33 |
+
## Base image
|
34 |
+
FROM ubuntu:$UBUNTU_VERSION AS base
|
35 |
+
|
36 |
+
RUN apt-get update \
|
37 |
+
&& apt-get install -y libgomp1 curl libvulkan-dev \
|
38 |
+
&& apt autoremove -y \
|
39 |
+
&& apt clean -y \
|
40 |
+
&& rm -rf /tmp/* /var/tmp/* \
|
41 |
+
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
42 |
+
&& find /var/cache -type f -delete
|
43 |
+
|
44 |
+
COPY --from=build /app/lib/ /app
|
45 |
+
|
46 |
+
### Full
|
47 |
+
FROM base AS full
|
48 |
+
|
49 |
+
COPY --from=build /app/full /app
|
50 |
+
|
51 |
+
WORKDIR /app
|
52 |
+
|
53 |
+
RUN apt-get update \
|
54 |
+
&& apt-get install -y \
|
55 |
+
git \
|
56 |
+
python3 \
|
57 |
+
python3-pip \
|
58 |
+
python3-wheel \
|
59 |
+
&& pip install --break-system-packages --upgrade setuptools \
|
60 |
+
&& pip install --break-system-packages -r requirements.txt \
|
61 |
+
&& apt autoremove -y \
|
62 |
+
&& apt clean -y \
|
63 |
+
&& rm -rf /tmp/* /var/tmp/* \
|
64 |
+
&& find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
|
65 |
+
&& find /var/cache -type f -delete
|
66 |
+
|
67 |
+
ENTRYPOINT ["/app/tools.sh"]
|
68 |
+
|
69 |
+
### Light, CLI only
|
70 |
+
FROM base AS light
|
71 |
+
|
72 |
+
COPY --from=build /app/full/llama-cli /app
|
73 |
+
|
74 |
+
WORKDIR /app
|
75 |
+
|
76 |
+
ENTRYPOINT [ "/app/llama-cli" ]
|
77 |
+
|
78 |
+
### Server, Server only
|
79 |
+
FROM base AS server
|
80 |
+
|
81 |
+
ENV LLAMA_ARG_HOST=0.0.0.0
|
82 |
+
|
83 |
+
COPY --from=build /app/full/llama-server /app
|
84 |
+
|
85 |
+
WORKDIR /app
|
86 |
+
|
87 |
+
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
88 |
+
|
89 |
+
ENTRYPOINT [ "/app/llama-server" ]
|
llama.cpp/.dockerignore
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.o
|
2 |
+
*.a
|
3 |
+
.cache/
|
4 |
+
# Do not ignore .git directory, otherwise the reported build number will always be 0
|
5 |
+
.github/
|
6 |
+
.gitignore
|
7 |
+
.vs/
|
8 |
+
.vscode/
|
9 |
+
.DS_Store
|
10 |
+
|
11 |
+
build*/
|
12 |
+
|
13 |
+
models/*
|
14 |
+
|
15 |
+
/llama-cli
|
16 |
+
/llama-quantize
|
17 |
+
|
18 |
+
arm_neon.h
|
19 |
+
compile_commands.json
|
20 |
+
Dockerfile
|
llama.cpp/.ecrc
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"Exclude": ["^\\.gitmodules$", "stb_image\\.h"],
|
3 |
+
"Disable": {
|
4 |
+
"IndentSize": true
|
5 |
+
}
|
6 |
+
}
|
llama.cpp/.editorconfig
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# https://EditorConfig.org
|
2 |
+
|
3 |
+
# Top-most EditorConfig file
|
4 |
+
root = true
|
5 |
+
|
6 |
+
# Unix-style newlines with a newline ending every file, utf-8 charset
|
7 |
+
[*]
|
8 |
+
end_of_line = lf
|
9 |
+
insert_final_newline = true
|
10 |
+
trim_trailing_whitespace = true
|
11 |
+
charset = utf-8
|
12 |
+
indent_style = space
|
13 |
+
indent_size = 4
|
14 |
+
|
15 |
+
[Makefile]
|
16 |
+
indent_style = tab
|
17 |
+
|
18 |
+
[scripts/*.mk]
|
19 |
+
indent_style = tab
|
20 |
+
|
21 |
+
[prompts/*.txt]
|
22 |
+
insert_final_newline = unset
|
23 |
+
|
24 |
+
[examples/server/public/*]
|
25 |
+
indent_size = 2
|
26 |
+
|
27 |
+
[examples/server/public/deps_*]
|
28 |
+
trim_trailing_whitespace = unset
|
29 |
+
indent_style = unset
|
30 |
+
indent_size = unset
|
31 |
+
|
32 |
+
[examples/server/deps_*]
|
33 |
+
trim_trailing_whitespace = unset
|
34 |
+
indent_style = unset
|
35 |
+
indent_size = unset
|
36 |
+
|
37 |
+
[examples/llama.swiftui/llama.swiftui.xcodeproj/*]
|
38 |
+
indent_style = tab
|
39 |
+
|
40 |
+
[examples/cvector-generator/*.txt]
|
41 |
+
trim_trailing_whitespace = unset
|
42 |
+
insert_final_newline = unset
|
43 |
+
|
44 |
+
[models/templates/*.jinja]
|
45 |
+
indent_style = unset
|
46 |
+
indent_size = unset
|
47 |
+
end_of_line = unset
|
48 |
+
charset = unset
|
49 |
+
trim_trailing_whitespace = unset
|
50 |
+
insert_final_newline = unset
|
llama.cpp/.flake8
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[flake8]
|
2 |
+
max-line-length = 125
|
3 |
+
ignore = E203,E211,E221,E225,E231,E241,E251,E261,E266,E501,E701,E704,W503
|
4 |
+
exclude =
|
5 |
+
# Do not traverse examples
|
6 |
+
examples,
|
7 |
+
# Do not include package initializers
|
8 |
+
__init__.py,
|
9 |
+
# No need to traverse our git directory
|
10 |
+
.git,
|
11 |
+
# There's no value in checking cache directories
|
12 |
+
__pycache__,
|
13 |
+
# No need to include the build path
|
14 |
+
build,
|
15 |
+
# This contains builds that we don't want to check
|
16 |
+
dist # This is generated with `python build .` for package releases
|
17 |
+
# max-complexity = 10
|
llama.cpp/.github/.DS_Store
ADDED
Binary file (6.15 kB). View file
|
|
llama.cpp/.github/ISSUE_TEMPLATE/010-bug-compilation.yml
ADDED
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: Bug (compilation)
|
2 |
+
description: Something goes wrong when trying to compile llama.cpp.
|
3 |
+
title: "Compile bug: "
|
4 |
+
labels: ["bug-unconfirmed", "compilation"]
|
5 |
+
body:
|
6 |
+
- type: markdown
|
7 |
+
attributes:
|
8 |
+
value: >
|
9 |
+
Thanks for taking the time to fill out this bug report!
|
10 |
+
This issue template is intended for bug reports where the compilation of llama.cpp fails.
|
11 |
+
Before opening an issue, please confirm that the compilation still fails with `-DGGML_CCACHE=OFF`.
|
12 |
+
If the compilation succeeds with ccache disabled you should be able to permanently fix the issue
|
13 |
+
by clearing `~/.cache/ccache` (on Linux).
|
14 |
+
- type: textarea
|
15 |
+
id: commit
|
16 |
+
attributes:
|
17 |
+
label: Git commit
|
18 |
+
description: Which commit are you trying to compile?
|
19 |
+
placeholder: |
|
20 |
+
$git rev-parse HEAD
|
21 |
+
84a07a17b1b08cf2b9747c633a2372782848a27f
|
22 |
+
validations:
|
23 |
+
required: true
|
24 |
+
- type: dropdown
|
25 |
+
id: operating-system
|
26 |
+
attributes:
|
27 |
+
label: Operating systems
|
28 |
+
description: Which operating systems do you know to be affected?
|
29 |
+
multiple: true
|
30 |
+
options:
|
31 |
+
- Linux
|
32 |
+
- Mac
|
33 |
+
- Windows
|
34 |
+
- BSD
|
35 |
+
- Other? (Please let us know in description)
|
36 |
+
validations:
|
37 |
+
required: true
|
38 |
+
- type: dropdown
|
39 |
+
id: backends
|
40 |
+
attributes:
|
41 |
+
label: GGML backends
|
42 |
+
description: Which GGML backends do you know to be affected?
|
43 |
+
options: [AMX, BLAS, CPU, CUDA, HIP, Kompute, Metal, Musa, RPC, SYCL, Vulkan]
|
44 |
+
multiple: true
|
45 |
+
validations:
|
46 |
+
required: true
|
47 |
+
- type: textarea
|
48 |
+
id: info
|
49 |
+
attributes:
|
50 |
+
label: Problem description & steps to reproduce
|
51 |
+
description: >
|
52 |
+
Please give us a summary of the problem and tell us how to reproduce it.
|
53 |
+
If you can narrow down the bug to specific compile flags, that information would be very much appreciated by us.
|
54 |
+
placeholder: >
|
55 |
+
I'm trying to compile llama.cpp with CUDA support on a fresh install of Ubuntu and get error XY.
|
56 |
+
Here are the exact commands that I used: ...
|
57 |
+
validations:
|
58 |
+
required: true
|
59 |
+
- type: textarea
|
60 |
+
id: first_bad_commit
|
61 |
+
attributes:
|
62 |
+
label: First Bad Commit
|
63 |
+
description: >
|
64 |
+
If the bug was not present on an earlier version: when did it start appearing?
|
65 |
+
If possible, please do a git bisect and identify the exact commit that introduced the bug.
|
66 |
+
validations:
|
67 |
+
required: false
|
68 |
+
- type: textarea
|
69 |
+
id: command
|
70 |
+
attributes:
|
71 |
+
label: Compile command
|
72 |
+
description: >
|
73 |
+
Please provide the exact command you used to compile llama.cpp. For example: `cmake -B ...`.
|
74 |
+
This will be automatically formatted into code, so no need for backticks.
|
75 |
+
render: shell
|
76 |
+
validations:
|
77 |
+
required: true
|
78 |
+
- type: textarea
|
79 |
+
id: logs
|
80 |
+
attributes:
|
81 |
+
label: Relevant log output
|
82 |
+
description: >
|
83 |
+
Please copy and paste any relevant log output, including any generated text.
|
84 |
+
This will be automatically formatted into code, so no need for backticks.
|
85 |
+
render: shell
|
86 |
+
validations:
|
87 |
+
required: true
|
llama.cpp/.github/ISSUE_TEMPLATE/011-bug-results.yml
ADDED
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: Bug (model use)
|
2 |
+
description: Something goes wrong when using a model (in general, not specific to a single llama.cpp module).
|
3 |
+
title: "Eval bug: "
|
4 |
+
labels: ["bug-unconfirmed", "model evaluation"]
|
5 |
+
body:
|
6 |
+
- type: markdown
|
7 |
+
attributes:
|
8 |
+
value: >
|
9 |
+
Thanks for taking the time to fill out this bug report!
|
10 |
+
This issue template is intended for bug reports where the model evaluation results
|
11 |
+
(i.e. the generated text) are incorrect or llama.cpp crashes during model evaluation.
|
12 |
+
If you encountered the issue while using an external UI (e.g. ollama),
|
13 |
+
please reproduce your issue using one of the examples/binaries in this repository.
|
14 |
+
The `llama-cli` binary can be used for simple and reproducible model inference.
|
15 |
+
- type: textarea
|
16 |
+
id: version
|
17 |
+
attributes:
|
18 |
+
label: Name and Version
|
19 |
+
description: Which version of our software are you running? (use `--version` to get a version string)
|
20 |
+
placeholder: |
|
21 |
+
$./llama-cli --version
|
22 |
+
version: 2999 (42b4109e)
|
23 |
+
built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
|
24 |
+
validations:
|
25 |
+
required: true
|
26 |
+
- type: dropdown
|
27 |
+
id: operating-system
|
28 |
+
attributes:
|
29 |
+
label: Operating systems
|
30 |
+
description: Which operating systems do you know to be affected?
|
31 |
+
multiple: true
|
32 |
+
options:
|
33 |
+
- Linux
|
34 |
+
- Mac
|
35 |
+
- Windows
|
36 |
+
- BSD
|
37 |
+
- Other? (Please let us know in description)
|
38 |
+
validations:
|
39 |
+
required: true
|
40 |
+
- type: dropdown
|
41 |
+
id: backends
|
42 |
+
attributes:
|
43 |
+
label: GGML backends
|
44 |
+
description: Which GGML backends do you know to be affected?
|
45 |
+
options: [AMX, BLAS, CPU, CUDA, HIP, Kompute, Metal, Musa, RPC, SYCL, Vulkan]
|
46 |
+
multiple: true
|
47 |
+
validations:
|
48 |
+
required: true
|
49 |
+
- type: textarea
|
50 |
+
id: hardware
|
51 |
+
attributes:
|
52 |
+
label: Hardware
|
53 |
+
description: Which CPUs/GPUs are you using?
|
54 |
+
placeholder: >
|
55 |
+
e.g. Ryzen 5950X + 2x RTX 4090
|
56 |
+
validations:
|
57 |
+
required: true
|
58 |
+
- type: textarea
|
59 |
+
id: model
|
60 |
+
attributes:
|
61 |
+
label: Models
|
62 |
+
description: >
|
63 |
+
Which model(s) at which quantization were you using when encountering the bug?
|
64 |
+
If you downloaded a GGUF file off of Huggingface, please provide a link.
|
65 |
+
placeholder: >
|
66 |
+
e.g. Meta LLaMA 3.1 Instruct 8b q4_K_M
|
67 |
+
validations:
|
68 |
+
required: false
|
69 |
+
- type: textarea
|
70 |
+
id: info
|
71 |
+
attributes:
|
72 |
+
label: Problem description & steps to reproduce
|
73 |
+
description: >
|
74 |
+
Please give us a summary of the problem and tell us how to reproduce it.
|
75 |
+
If you can narrow down the bug to specific hardware, compile flags, or command line arguments,
|
76 |
+
that information would be very much appreciated by us.
|
77 |
+
placeholder: >
|
78 |
+
e.g. when I run llama-cli with -ngl 99 I get garbled outputs.
|
79 |
+
When I use -ngl 0 it works correctly.
|
80 |
+
Here are the exact commands that I used: ...
|
81 |
+
validations:
|
82 |
+
required: true
|
83 |
+
- type: textarea
|
84 |
+
id: first_bad_commit
|
85 |
+
attributes:
|
86 |
+
label: First Bad Commit
|
87 |
+
description: >
|
88 |
+
If the bug was not present on an earlier version: when did it start appearing?
|
89 |
+
If possible, please do a git bisect and identify the exact commit that introduced the bug.
|
90 |
+
validations:
|
91 |
+
required: false
|
92 |
+
- type: textarea
|
93 |
+
id: logs
|
94 |
+
attributes:
|
95 |
+
label: Relevant log output
|
96 |
+
description: >
|
97 |
+
Please copy and paste any relevant log output, including the command that you entered and any generated text.
|
98 |
+
This will be automatically formatted into code, so no need for backticks.
|
99 |
+
render: shell
|
100 |
+
validations:
|
101 |
+
required: true
|
llama.cpp/.github/ISSUE_TEMPLATE/019-bug-misc.yml
ADDED
@@ -0,0 +1,91 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: Bug (misc.)
|
2 |
+
description: Something is not working the way it should (and it's not covered by any of the above cases).
|
3 |
+
title: "Misc. bug: "
|
4 |
+
labels: ["bug-unconfirmed"]
|
5 |
+
body:
|
6 |
+
- type: markdown
|
7 |
+
attributes:
|
8 |
+
value: >
|
9 |
+
Thanks for taking the time to fill out this bug report!
|
10 |
+
This issue template is intended for miscellaneous bugs that don't fit into any other category.
|
11 |
+
If you encountered the issue while using an external UI (e.g. ollama),
|
12 |
+
please reproduce your issue using one of the examples/binaries in this repository.
|
13 |
+
- type: textarea
|
14 |
+
id: version
|
15 |
+
attributes:
|
16 |
+
label: Name and Version
|
17 |
+
description: Which version of our software is affected? (You can use `--version` to get a version string.)
|
18 |
+
placeholder: |
|
19 |
+
$./llama-cli --version
|
20 |
+
version: 2999 (42b4109e)
|
21 |
+
built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
|
22 |
+
validations:
|
23 |
+
required: true
|
24 |
+
- type: dropdown
|
25 |
+
id: operating-system
|
26 |
+
attributes:
|
27 |
+
label: Operating systems
|
28 |
+
description: Which operating systems do you know to be affected?
|
29 |
+
multiple: true
|
30 |
+
options:
|
31 |
+
- Linux
|
32 |
+
- Mac
|
33 |
+
- Windows
|
34 |
+
- BSD
|
35 |
+
- Other? (Please let us know in description)
|
36 |
+
validations:
|
37 |
+
required: false
|
38 |
+
- type: dropdown
|
39 |
+
id: module
|
40 |
+
attributes:
|
41 |
+
label: Which llama.cpp modules do you know to be affected?
|
42 |
+
multiple: true
|
43 |
+
options:
|
44 |
+
- Documentation/Github
|
45 |
+
- libllama (core library)
|
46 |
+
- llama-cli
|
47 |
+
- llama-server
|
48 |
+
- llama-bench
|
49 |
+
- llama-quantize
|
50 |
+
- Python/Bash scripts
|
51 |
+
- Test code
|
52 |
+
- Other (Please specify in the next section)
|
53 |
+
validations:
|
54 |
+
required: false
|
55 |
+
- type: textarea
|
56 |
+
id: command
|
57 |
+
attributes:
|
58 |
+
label: Command line
|
59 |
+
description: >
|
60 |
+
Please provide the exact commands you entered, if applicable. For example: `llama-server -m ... -c ...`, `llama-cli -m ...`, etc.
|
61 |
+
This will be automatically formatted into code, so no need for backticks.
|
62 |
+
render: shell
|
63 |
+
validations:
|
64 |
+
required: false
|
65 |
+
- type: textarea
|
66 |
+
id: info
|
67 |
+
attributes:
|
68 |
+
label: Problem description & steps to reproduce
|
69 |
+
description: >
|
70 |
+
Please give us a summary of the problem and tell us how to reproduce it (if applicable).
|
71 |
+
validations:
|
72 |
+
required: true
|
73 |
+
- type: textarea
|
74 |
+
id: first_bad_commit
|
75 |
+
attributes:
|
76 |
+
label: First Bad Commit
|
77 |
+
description: >
|
78 |
+
If the bug was not present on an earlier version and it's not trivial to track down: when did it start appearing?
|
79 |
+
If possible, please do a git bisect and identify the exact commit that introduced the bug.
|
80 |
+
validations:
|
81 |
+
required: false
|
82 |
+
- type: textarea
|
83 |
+
id: logs
|
84 |
+
attributes:
|
85 |
+
label: Relevant log output
|
86 |
+
description: >
|
87 |
+
If applicable, please copy and paste any relevant log output, including any generated text.
|
88 |
+
This will be automatically formatted into code, so no need for backticks.
|
89 |
+
render: shell
|
90 |
+
validations:
|
91 |
+
required: false
|
llama.cpp/.github/ISSUE_TEMPLATE/020-enhancement.yml
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: Enhancement
|
2 |
+
description: Used to request enhancements for llama.cpp.
|
3 |
+
title: "Feature Request: "
|
4 |
+
labels: ["enhancement"]
|
5 |
+
body:
|
6 |
+
- type: markdown
|
7 |
+
attributes:
|
8 |
+
value: |
|
9 |
+
[Please post your idea first in Discussion if there is not yet a consensus for this enhancement request. This will help to keep this issue tracker focused on enhancements that the community has agreed needs to be implemented.](https://github.com/ggerganov/llama.cpp/discussions/categories/ideas)
|
10 |
+
|
11 |
+
- type: checkboxes
|
12 |
+
id: prerequisites
|
13 |
+
attributes:
|
14 |
+
label: Prerequisites
|
15 |
+
description: Please confirm the following before submitting your enhancement request.
|
16 |
+
options:
|
17 |
+
- label: I am running the latest code. Mention the version if possible as well.
|
18 |
+
required: true
|
19 |
+
- label: I carefully followed the [README.md](https://github.com/ggerganov/llama.cpp/blob/master/README.md).
|
20 |
+
required: true
|
21 |
+
- label: I searched using keywords relevant to my issue to make sure that I am creating a new issue that is not already open (or closed).
|
22 |
+
required: true
|
23 |
+
- label: I reviewed the [Discussions](https://github.com/ggerganov/llama.cpp/discussions), and have a new and useful enhancement to share.
|
24 |
+
required: true
|
25 |
+
|
26 |
+
- type: textarea
|
27 |
+
id: feature-description
|
28 |
+
attributes:
|
29 |
+
label: Feature Description
|
30 |
+
description: Please provide a detailed written description of what you were trying to do, and what you expected `llama.cpp` to do as an enhancement.
|
31 |
+
placeholder: Detailed description of the enhancement
|
32 |
+
validations:
|
33 |
+
required: true
|
34 |
+
|
35 |
+
- type: textarea
|
36 |
+
id: motivation
|
37 |
+
attributes:
|
38 |
+
label: Motivation
|
39 |
+
description: Please provide a detailed written description of reasons why this feature is necessary and how it is useful to `llama.cpp` users.
|
40 |
+
placeholder: Explanation of why this feature is needed and its benefits
|
41 |
+
validations:
|
42 |
+
required: true
|
43 |
+
|
44 |
+
- type: textarea
|
45 |
+
id: possible-implementation
|
46 |
+
attributes:
|
47 |
+
label: Possible Implementation
|
48 |
+
description: If you have an idea as to how it can be implemented, please write a detailed description. Feel free to give links to external sources or share visuals that might be helpful to understand the details better.
|
49 |
+
placeholder: Detailed description of potential implementation
|
50 |
+
validations:
|
51 |
+
required: false
|
llama.cpp/.github/ISSUE_TEMPLATE/030-research.yml
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: Research
|
2 |
+
description: Track new technical research area.
|
3 |
+
title: "Research: "
|
4 |
+
labels: ["research 🔬"]
|
5 |
+
body:
|
6 |
+
- type: markdown
|
7 |
+
attributes:
|
8 |
+
value: |
|
9 |
+
Don't forget to check for any [duplicate research issue tickets](https://github.com/ggerganov/llama.cpp/issues?q=is%3Aopen+is%3Aissue+label%3A%22research+%F0%9F%94%AC%22)
|
10 |
+
|
11 |
+
- type: checkboxes
|
12 |
+
id: research-stage
|
13 |
+
attributes:
|
14 |
+
label: Research Stage
|
15 |
+
description: Track general state of this research ticket
|
16 |
+
options:
|
17 |
+
- label: Background Research (Let's try to avoid reinventing the wheel)
|
18 |
+
- label: Hypothesis Formed (How do you think this will work and it's effect?)
|
19 |
+
- label: Strategy / Implementation Forming
|
20 |
+
- label: Analysis of results
|
21 |
+
- label: Debrief / Documentation (So people in the future can learn from us)
|
22 |
+
|
23 |
+
- type: textarea
|
24 |
+
id: background
|
25 |
+
attributes:
|
26 |
+
label: Previous existing literature and research
|
27 |
+
description: Whats the current state of the art and whats the motivation for this research?
|
28 |
+
|
29 |
+
- type: textarea
|
30 |
+
id: hypothesis
|
31 |
+
attributes:
|
32 |
+
label: Hypothesis
|
33 |
+
description: How do you think this will work and it's effect?
|
34 |
+
|
35 |
+
- type: textarea
|
36 |
+
id: implementation
|
37 |
+
attributes:
|
38 |
+
label: Implementation
|
39 |
+
description: Got an approach? e.g. a PR ready to go?
|
40 |
+
|
41 |
+
- type: textarea
|
42 |
+
id: analysis
|
43 |
+
attributes:
|
44 |
+
label: Analysis
|
45 |
+
description: How does the proposed implementation behave?
|
46 |
+
|
47 |
+
- type: textarea
|
48 |
+
id: logs
|
49 |
+
attributes:
|
50 |
+
label: Relevant log output
|
51 |
+
description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
|
52 |
+
render: shell
|
llama.cpp/.github/ISSUE_TEMPLATE/040-refactor.yml
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: Refactor (Maintainers)
|
2 |
+
description: Used to track refactoring opportunities.
|
3 |
+
title: "Refactor: "
|
4 |
+
labels: ["refactor"]
|
5 |
+
body:
|
6 |
+
- type: markdown
|
7 |
+
attributes:
|
8 |
+
value: |
|
9 |
+
Don't forget to [check for existing refactor issue tickets](https://github.com/ggerganov/llama.cpp/issues?q=is%3Aopen+is%3Aissue+label%3Arefactoring) in case it's already covered.
|
10 |
+
Also you may want to check [Pull request refactor label as well](https://github.com/ggerganov/llama.cpp/pulls?q=is%3Aopen+is%3Apr+label%3Arefactoring) for duplicates too.
|
11 |
+
|
12 |
+
- type: textarea
|
13 |
+
id: background-description
|
14 |
+
attributes:
|
15 |
+
label: Background Description
|
16 |
+
description: Please provide a detailed written description of the pain points you are trying to solve.
|
17 |
+
placeholder: Detailed description behind your motivation to request refactor
|
18 |
+
validations:
|
19 |
+
required: true
|
20 |
+
|
21 |
+
- type: textarea
|
22 |
+
id: possible-approaches
|
23 |
+
attributes:
|
24 |
+
label: Possible Refactor Approaches
|
25 |
+
description: If you have some idea of possible approaches to solve this problem. You may want to make it a todo list.
|
26 |
+
placeholder: Your idea of possible refactoring opportunity/approaches
|
27 |
+
validations:
|
28 |
+
required: false
|
llama.cpp/.github/ISSUE_TEMPLATE/config.yml
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
blank_issues_enabled: true
|
2 |
+
contact_links:
|
3 |
+
- name: Got an idea?
|
4 |
+
url: https://github.com/ggerganov/llama.cpp/discussions/categories/ideas
|
5 |
+
about: Pop it there. It may then become an enhancement ticket.
|
6 |
+
- name: Got a question?
|
7 |
+
url: https://github.com/ggerganov/llama.cpp/discussions/categories/q-a
|
8 |
+
about: Ask a question there!
|
9 |
+
- name: Want to contribute?
|
10 |
+
url: https://github.com/ggerganov/llama.cpp/wiki/contribute
|
11 |
+
about: Head to the contribution guide page of the wiki for areas you can help with
|
llama.cpp/.github/labeler.yml
ADDED
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# https://github.com/actions/labeler
|
2 |
+
Kompute:
|
3 |
+
- changed-files:
|
4 |
+
- any-glob-to-any-file:
|
5 |
+
- ggml/include/ggml-kompute.h
|
6 |
+
- ggml/src/ggml-kompute/**
|
7 |
+
- README-kompute.md
|
8 |
+
Apple Metal:
|
9 |
+
- changed-files:
|
10 |
+
- any-glob-to-any-file:
|
11 |
+
- ggml/include/ggml-metal.h
|
12 |
+
- ggml/src/ggml-metal/**
|
13 |
+
- README-metal.md
|
14 |
+
SYCL:
|
15 |
+
- changed-files:
|
16 |
+
- any-glob-to-any-file:
|
17 |
+
- ggml/include/ggml-sycl.h
|
18 |
+
- ggml/src/ggml-sycl/**
|
19 |
+
- docs/backend/SYCL.md
|
20 |
+
- examples/sycl/**
|
21 |
+
Nvidia GPU:
|
22 |
+
- changed-files:
|
23 |
+
- any-glob-to-any-file:
|
24 |
+
- ggml/include/ggml-cuda.h
|
25 |
+
- ggml/src/ggml-cuda/**
|
26 |
+
Vulkan:
|
27 |
+
- changed-files:
|
28 |
+
- any-glob-to-any-file:
|
29 |
+
- ggml/include/ggml-vulkan.h
|
30 |
+
- ggml/src/ggml-vulkan/**
|
31 |
+
documentation:
|
32 |
+
- changed-files:
|
33 |
+
- any-glob-to-any-file:
|
34 |
+
- docs/**
|
35 |
+
- media/**
|
36 |
+
testing:
|
37 |
+
- changed-files:
|
38 |
+
- any-glob-to-any-file:
|
39 |
+
- tests/**
|
40 |
+
build:
|
41 |
+
- changed-files:
|
42 |
+
- any-glob-to-any-file:
|
43 |
+
- cmake/**
|
44 |
+
- CMakeLists.txt
|
45 |
+
- CMakePresets.json
|
46 |
+
examples:
|
47 |
+
- changed-files:
|
48 |
+
- any-glob-to-any-file: examples/**
|
49 |
+
devops:
|
50 |
+
- changed-files:
|
51 |
+
- any-glob-to-any-file:
|
52 |
+
- .devops/**
|
53 |
+
- .github/**
|
54 |
+
- ci/**
|
55 |
+
python:
|
56 |
+
- changed-files:
|
57 |
+
- any-glob-to-any-file:
|
58 |
+
- "**/*.py"
|
59 |
+
- requirements/**
|
60 |
+
- gguf-py/**
|
61 |
+
- .flake8
|
62 |
+
script:
|
63 |
+
- changed-files:
|
64 |
+
- any-glob-to-any-file:
|
65 |
+
- scripts/**
|
66 |
+
android:
|
67 |
+
- changed-files:
|
68 |
+
- any-glob-to-any-file:
|
69 |
+
- examples/llama.android/**
|
70 |
+
server:
|
71 |
+
- changed-files:
|
72 |
+
- any-glob-to-any-file:
|
73 |
+
- examples/server/**
|
74 |
+
ggml:
|
75 |
+
- changed-files:
|
76 |
+
- any-glob-to-any-file:
|
77 |
+
- ggml/**
|
78 |
+
nix:
|
79 |
+
- changed-files:
|
80 |
+
- any-glob-to-any-file:
|
81 |
+
- "**/*.nix"
|
82 |
+
- .github/workflows/nix-*.yml
|
83 |
+
- .devops/nix/nixpkgs-instances.nix
|
84 |
+
embedding:
|
85 |
+
- changed-files:
|
86 |
+
- any-glob-to-any-file: examples/embedding/
|
llama.cpp/.github/pull_request_template.md
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
*Make sure to read the [contributing guidelines](https://github.com/ggerganov/llama.cpp/blob/master/CONTRIBUTING.md) before submitting a PR*
|
llama.cpp/.github/workflows/bench.yml.disabled
ADDED
@@ -0,0 +1,315 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# TODO: there have been some issues with the workflow, so disabling for now
|
2 |
+
# https://github.com/ggerganov/llama.cpp/issues/7893
|
3 |
+
#
|
4 |
+
# Benchmark
|
5 |
+
name: Benchmark
|
6 |
+
|
7 |
+
on:
|
8 |
+
workflow_dispatch:
|
9 |
+
inputs:
|
10 |
+
gpu-series:
|
11 |
+
description: 'Azure GPU series to run with'
|
12 |
+
required: true
|
13 |
+
type: choice
|
14 |
+
options:
|
15 |
+
- Standard_NC4as_T4_v3
|
16 |
+
- Standard_NC24ads_A100_v4
|
17 |
+
- Standard_NC80adis_H100_v5
|
18 |
+
sha:
|
19 |
+
description: 'Commit SHA1 to build'
|
20 |
+
required: false
|
21 |
+
type: string
|
22 |
+
duration:
|
23 |
+
description: 'Duration of the bench'
|
24 |
+
type: string
|
25 |
+
default: 10m
|
26 |
+
|
27 |
+
push:
|
28 |
+
branches:
|
29 |
+
- master
|
30 |
+
paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
|
31 |
+
pull_request_target:
|
32 |
+
types: [opened, synchronize, reopened]
|
33 |
+
paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
|
34 |
+
schedule:
|
35 |
+
- cron: '04 2 * * *'
|
36 |
+
|
37 |
+
concurrency:
|
38 |
+
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }}-${{ github.event.inputs.sha }}
|
39 |
+
cancel-in-progress: true
|
40 |
+
|
41 |
+
jobs:
|
42 |
+
bench-server-baseline:
|
43 |
+
runs-on: Standard_NC4as_T4_v3
|
44 |
+
env:
|
45 |
+
RUNNER_LABEL: Standard_NC4as_T4_v3 # FIXME Do not find a way to not duplicate it
|
46 |
+
N_USERS: 8
|
47 |
+
DURATION: 10m
|
48 |
+
|
49 |
+
strategy:
|
50 |
+
matrix:
|
51 |
+
model: [phi-2]
|
52 |
+
ftype: [q4_0, q8_0, f16]
|
53 |
+
include:
|
54 |
+
- model: phi-2
|
55 |
+
ftype: q4_0
|
56 |
+
pr_comment_enabled: "true"
|
57 |
+
|
58 |
+
if: |
|
59 |
+
inputs.gpu-series == 'Standard_NC4as_T4_v3'
|
60 |
+
|| (
|
61 |
+
github.event_name == 'schedule'
|
62 |
+
&& github.ref_name == 'master'
|
63 |
+
&& github.repository_owner == 'ggerganov'
|
64 |
+
)
|
65 |
+
|| github.event_name == 'pull_request_target'
|
66 |
+
|| (
|
67 |
+
github.event_name == 'push'
|
68 |
+
&& github.event.ref == 'refs/heads/master'
|
69 |
+
&& github.repository_owner == 'ggerganov'
|
70 |
+
)
|
71 |
+
steps:
|
72 |
+
- name: Clone
|
73 |
+
id: checkout
|
74 |
+
uses: actions/checkout@v4
|
75 |
+
with:
|
76 |
+
fetch-depth: 0
|
77 |
+
ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
|
78 |
+
|
79 |
+
- name: Install python env
|
80 |
+
id: pipenv
|
81 |
+
run: |
|
82 |
+
cd examples/server/bench
|
83 |
+
python3 -m venv venv
|
84 |
+
source venv/bin/activate
|
85 |
+
pip install -r requirements.txt
|
86 |
+
|
87 |
+
- name: Prometheus
|
88 |
+
id: install_prometheus
|
89 |
+
run: |
|
90 |
+
wget --quiet https://github.com/prometheus/prometheus/releases/download/v2.51.0/prometheus-2.51.0.linux-amd64.tar.gz
|
91 |
+
tar xzf prometheus*.tar.gz --strip-components=1
|
92 |
+
./prometheus --config.file=examples/server/bench/prometheus.yml &
|
93 |
+
while ! nc -z localhost 9090; do
|
94 |
+
sleep 0.1
|
95 |
+
done
|
96 |
+
|
97 |
+
- name: Set up Go
|
98 |
+
uses: actions/setup-go@v5
|
99 |
+
with:
|
100 |
+
go-version: '1.21'
|
101 |
+
|
102 |
+
- name: Install k6 and xk6-sse
|
103 |
+
id: k6_installation
|
104 |
+
run: |
|
105 |
+
cd examples/server/bench
|
106 |
+
go install go.k6.io/xk6/cmd/xk6@latest
|
107 |
+
xk6 build master \
|
108 |
+
--with github.com/phymbert/xk6-sse
|
109 |
+
|
110 |
+
- name: Build
|
111 |
+
id: cmake_build
|
112 |
+
run: |
|
113 |
+
set -eux
|
114 |
+
cmake -B build \
|
115 |
+
-DGGML_NATIVE=OFF \
|
116 |
+
-DLLAMA_BUILD_SERVER=ON \
|
117 |
+
-DLLAMA_CURL=ON \
|
118 |
+
-DLLAMA_CUBLAS=ON \
|
119 |
+
-DCUDAToolkit_ROOT=/usr/local/cuda \
|
120 |
+
-DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc \
|
121 |
+
-DCMAKE_CUDA_ARCHITECTURES=75 \
|
122 |
+
-DLLAMA_FATAL_WARNINGS=OFF \
|
123 |
+
-DLLAMA_ALL_WARNINGS=OFF \
|
124 |
+
-DCMAKE_BUILD_TYPE=Release;
|
125 |
+
cmake --build build --config Release -j $(nproc) --target llama-server
|
126 |
+
|
127 |
+
- name: Download the dataset
|
128 |
+
id: download_dataset
|
129 |
+
run: |
|
130 |
+
cd examples/server/bench
|
131 |
+
wget --quiet https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json
|
132 |
+
|
133 |
+
- name: Server bench
|
134 |
+
id: server_bench
|
135 |
+
env:
|
136 |
+
HEAD_REF: ${{ github.head_ref || github.ref_name }}
|
137 |
+
run: |
|
138 |
+
set -eux
|
139 |
+
|
140 |
+
cd examples/server/bench
|
141 |
+
source venv/bin/activate
|
142 |
+
python bench.py \
|
143 |
+
--runner-label ${{ env.RUNNER_LABEL }} \
|
144 |
+
--name ${{ github.job }} \
|
145 |
+
--branch $HEAD_REF \
|
146 |
+
--commit ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha }} \
|
147 |
+
--scenario script.js \
|
148 |
+
--duration ${{ github.event.inputs.duration || env.DURATION }} \
|
149 |
+
--hf-repo ggml-org/models \
|
150 |
+
--hf-file ${{ matrix.model }}/ggml-model-${{ matrix.ftype }}.gguf \
|
151 |
+
--model-path-prefix /models \
|
152 |
+
--parallel ${{ env.N_USERS }} \
|
153 |
+
-ngl 33 \
|
154 |
+
--batch-size 2048 \
|
155 |
+
--ubatch-size 256 \
|
156 |
+
--ctx-size 16384 \
|
157 |
+
--n-prompts 1000 \
|
158 |
+
--max-prompt-tokens 1024 \
|
159 |
+
--max-tokens 2048
|
160 |
+
|
161 |
+
cat results.github.env >> $GITHUB_ENV
|
162 |
+
|
163 |
+
# Remove dataset as we do not want it in the artefact
|
164 |
+
rm ShareGPT_V3_unfiltered_cleaned_split.json
|
165 |
+
|
166 |
+
- uses: actions/upload-artifact@v4
|
167 |
+
with:
|
168 |
+
name: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
|
169 |
+
compression-level: 9
|
170 |
+
path: |
|
171 |
+
examples/server/bench/*.jpg
|
172 |
+
examples/server/bench/*.json
|
173 |
+
examples/server/bench/*.log
|
174 |
+
|
175 |
+
- name: Commit status
|
176 |
+
uses: Sibz/github-status-action@v1
|
177 |
+
with:
|
178 |
+
authToken: ${{secrets.GITHUB_TOKEN}}
|
179 |
+
sha: ${{ inputs.sha || github.event.pull_request.head.sha || github.sha }}
|
180 |
+
context: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
|
181 |
+
description: |
|
182 |
+
${{ env.BENCH_RESULTS }}
|
183 |
+
state: 'success'
|
184 |
+
|
185 |
+
- name: Upload benchmark images
|
186 |
+
uses: devicons/[email protected]
|
187 |
+
continue-on-error: true # Important as it looks unstable: 503
|
188 |
+
id: imgur_step
|
189 |
+
with:
|
190 |
+
client_id: ${{secrets.IMGUR_CLIENT_ID}}
|
191 |
+
path: |
|
192 |
+
examples/server/bench/prompt_tokens_seconds.jpg
|
193 |
+
examples/server/bench/predicted_tokens_seconds.jpg
|
194 |
+
examples/server/bench/kv_cache_usage_ratio.jpg
|
195 |
+
examples/server/bench/requests_processing.jpg
|
196 |
+
|
197 |
+
- name: Extract mermaid
|
198 |
+
id: set_mermaid
|
199 |
+
run: |
|
200 |
+
set -eux
|
201 |
+
|
202 |
+
cd examples/server/bench
|
203 |
+
PROMPT_TOKENS_SECONDS=$(cat prompt_tokens_seconds.mermaid)
|
204 |
+
echo "PROMPT_TOKENS_SECONDS<<EOF" >> $GITHUB_ENV
|
205 |
+
echo "$PROMPT_TOKENS_SECONDS" >> $GITHUB_ENV
|
206 |
+
echo "EOF" >> $GITHUB_ENV
|
207 |
+
|
208 |
+
PREDICTED_TOKENS_SECONDS=$(cat predicted_tokens_seconds.mermaid)
|
209 |
+
echo "PREDICTED_TOKENS_SECONDS<<EOF" >> $GITHUB_ENV
|
210 |
+
echo "$PREDICTED_TOKENS_SECONDS" >> $GITHUB_ENV
|
211 |
+
echo "EOF" >> $GITHUB_ENV
|
212 |
+
|
213 |
+
KV_CACHE_USAGE_RATIO=$(cat kv_cache_usage_ratio.mermaid)
|
214 |
+
echo "KV_CACHE_USAGE_RATIO<<EOF" >> $GITHUB_ENV
|
215 |
+
echo "$KV_CACHE_USAGE_RATIO" >> $GITHUB_ENV
|
216 |
+
echo "EOF" >> $GITHUB_ENV
|
217 |
+
|
218 |
+
REQUESTS_PROCESSING=$(cat requests_processing.mermaid)
|
219 |
+
echo "REQUESTS_PROCESSING<<EOF" >> $GITHUB_ENV
|
220 |
+
echo "$REQUESTS_PROCESSING" >> $GITHUB_ENV
|
221 |
+
echo "EOF" >> $GITHUB_ENV
|
222 |
+
|
223 |
+
- name: Extract image url
|
224 |
+
id: extract_image_url
|
225 |
+
continue-on-error: true
|
226 |
+
run: |
|
227 |
+
set -eux
|
228 |
+
|
229 |
+
echo "IMAGE_O=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[0] }}" >> $GITHUB_ENV
|
230 |
+
echo "IMAGE_1=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[1] }}" >> $GITHUB_ENV
|
231 |
+
echo "IMAGE_2=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[2] }}" >> $GITHUB_ENV
|
232 |
+
echo "IMAGE_3=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[3] }}" >> $GITHUB_ENV
|
233 |
+
|
234 |
+
- name: Comment PR
|
235 |
+
uses: mshick/add-pr-comment@v2
|
236 |
+
id: comment_pr
|
237 |
+
if: ${{ github.event.pull_request != '' && matrix.pr_comment_enabled == 'true' }}
|
238 |
+
with:
|
239 |
+
message-id: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
|
240 |
+
message: |
|
241 |
+
<p align="center">
|
242 |
+
|
243 |
+
📈 **llama.cpp server** for _${{ github.job }}_ on _${{ env.RUNNER_LABEL }}_ for `${{ matrix.model }}`-`${{ matrix.ftype }}`: **${{ env.BENCH_ITERATIONS}} iterations** 🚀
|
244 |
+
|
245 |
+
</p>
|
246 |
+
|
247 |
+
<details>
|
248 |
+
|
249 |
+
<summary>Expand details for performance related PR only</summary>
|
250 |
+
|
251 |
+
- Concurrent users: ${{ env.N_USERS }}, duration: ${{ github.event.inputs.duration || env.DURATION }}
|
252 |
+
- HTTP request : avg=${{ env.HTTP_REQ_DURATION_AVG }}ms p(95)=${{ env.HTTP_REQ_DURATION_P_95_ }}ms fails=${{ env.HTTP_REQ_FAILED_PASSES }}, finish reason: stop=${{ env.LLAMACPP_COMPLETIONS_STOP_RATE_PASSES }} truncated=${{ env.LLAMACPP_COMPLETIONS_TRUNCATED_RATE_PASSES }}
|
253 |
+
- Prompt processing (pp): avg=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_AVG }}tk/s p(95)=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_P_95_ }}tk/s
|
254 |
+
- Token generation (tg): avg=${{ env.LLAMACPP_TOKENS_SECOND_AVG }}tk/s p(95)=${{ env.LLAMACPP_TOKENS_SECOND_P_95_ }}tk/s
|
255 |
+
- ${{ env.BENCH_GRAPH_XLABEL }}
|
256 |
+
|
257 |
+
|
258 |
+
<p align="center">
|
259 |
+
|
260 |
+
<img width="100%" height="100%" src="${{ env.IMAGE_O }}" alt="prompt_tokens_seconds" />
|
261 |
+
|
262 |
+
<details>
|
263 |
+
|
264 |
+
<summary>More</summary>
|
265 |
+
|
266 |
+
```mermaid
|
267 |
+
${{ env.PROMPT_TOKENS_SECONDS }}
|
268 |
+
```
|
269 |
+
|
270 |
+
</details>
|
271 |
+
|
272 |
+
<img width="100%" height="100%" src="${{ env.IMAGE_1 }}" alt="predicted_tokens_seconds"/>
|
273 |
+
|
274 |
+
<details>
|
275 |
+
<summary>More</summary>
|
276 |
+
|
277 |
+
```mermaid
|
278 |
+
${{ env.PREDICTED_TOKENS_SECONDS }}
|
279 |
+
```
|
280 |
+
|
281 |
+
</details>
|
282 |
+
|
283 |
+
</p>
|
284 |
+
|
285 |
+
<details>
|
286 |
+
|
287 |
+
<summary>Details</summary>
|
288 |
+
|
289 |
+
<p align="center">
|
290 |
+
|
291 |
+
<img width="100%" height="100%" src="${{ env.IMAGE_2 }}" alt="kv_cache_usage_ratio" />
|
292 |
+
|
293 |
+
<details>
|
294 |
+
<summary>More</summary>
|
295 |
+
|
296 |
+
```mermaid
|
297 |
+
${{ env.KV_CACHE_USAGE_RATIO }}
|
298 |
+
```
|
299 |
+
|
300 |
+
</details>
|
301 |
+
|
302 |
+
<img width="100%" height="100%" src="${{ env.IMAGE_3 }}" alt="requests_processing"/>
|
303 |
+
|
304 |
+
<details>
|
305 |
+
<summary>More</summary>
|
306 |
+
|
307 |
+
```mermaid
|
308 |
+
${{ env.REQUESTS_PROCESSING }}
|
309 |
+
```
|
310 |
+
|
311 |
+
</details>
|
312 |
+
|
313 |
+
</p>
|
314 |
+
</details>
|
315 |
+
</details>
|
llama.cpp/.github/workflows/build.yml
ADDED
@@ -0,0 +1,1645 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: CI
|
2 |
+
|
3 |
+
on:
|
4 |
+
workflow_dispatch: # allows manual triggering
|
5 |
+
inputs:
|
6 |
+
create_release:
|
7 |
+
description: 'Create new release'
|
8 |
+
required: true
|
9 |
+
type: boolean
|
10 |
+
push:
|
11 |
+
branches:
|
12 |
+
- master
|
13 |
+
paths: ['.github/workflows/build.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal', '**/*.comp']
|
14 |
+
pull_request:
|
15 |
+
types: [opened, synchronize, reopened]
|
16 |
+
paths: ['.github/workflows/build.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal', '**/*.comp']
|
17 |
+
|
18 |
+
concurrency:
|
19 |
+
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
20 |
+
cancel-in-progress: true
|
21 |
+
|
22 |
+
# Fine-grant permission
|
23 |
+
# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
|
24 |
+
permissions:
|
25 |
+
contents: write # for creating release
|
26 |
+
|
27 |
+
env:
|
28 |
+
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
|
29 |
+
GGML_NLOOP: 3
|
30 |
+
GGML_N_THREADS: 1
|
31 |
+
LLAMA_LOG_COLORS: 1
|
32 |
+
LLAMA_LOG_PREFIX: 1
|
33 |
+
LLAMA_LOG_TIMESTAMPS: 1
|
34 |
+
|
35 |
+
jobs:
|
36 |
+
macOS-latest-cmake-arm64:
|
37 |
+
runs-on: macos-14
|
38 |
+
|
39 |
+
steps:
|
40 |
+
- name: Clone
|
41 |
+
id: checkout
|
42 |
+
uses: actions/checkout@v4
|
43 |
+
with:
|
44 |
+
fetch-depth: 0
|
45 |
+
|
46 |
+
- name: ccache
|
47 |
+
uses: hendrikmuhs/[email protected]
|
48 |
+
with:
|
49 |
+
key: macOS-latest-cmake-arm64
|
50 |
+
evict-old-files: 1d
|
51 |
+
|
52 |
+
- name: Dependencies
|
53 |
+
id: depends
|
54 |
+
continue-on-error: true
|
55 |
+
run: |
|
56 |
+
brew update
|
57 |
+
|
58 |
+
- name: Build
|
59 |
+
id: cmake_build
|
60 |
+
run: |
|
61 |
+
sysctl -a
|
62 |
+
cmake -B build \
|
63 |
+
-DCMAKE_BUILD_RPATH="@loader_path" \
|
64 |
+
-DLLAMA_FATAL_WARNINGS=ON \
|
65 |
+
-DLLAMA_CURL=ON \
|
66 |
+
-DGGML_METAL_USE_BF16=ON \
|
67 |
+
-DGGML_METAL_EMBED_LIBRARY=ON \
|
68 |
+
-DGGML_RPC=ON
|
69 |
+
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
|
70 |
+
|
71 |
+
- name: Test
|
72 |
+
id: cmake_test
|
73 |
+
run: |
|
74 |
+
cd build
|
75 |
+
ctest -L 'main|curl' --verbose --timeout 900
|
76 |
+
|
77 |
+
- name: Determine tag name
|
78 |
+
id: tag
|
79 |
+
shell: bash
|
80 |
+
run: |
|
81 |
+
BUILD_NUMBER="$(git rev-list --count HEAD)"
|
82 |
+
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
|
83 |
+
if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
|
84 |
+
echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
|
85 |
+
else
|
86 |
+
SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
|
87 |
+
echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
|
88 |
+
fi
|
89 |
+
|
90 |
+
- name: Pack artifacts
|
91 |
+
id: pack_artifacts
|
92 |
+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
93 |
+
run: |
|
94 |
+
cp LICENSE ./build/bin/
|
95 |
+
cp examples/run/linenoise.cpp/LICENSE ./build/bin/LICENSE.linenoise.cpp
|
96 |
+
zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip ./build/bin/*
|
97 |
+
|
98 |
+
- name: Upload artifacts
|
99 |
+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
100 |
+
uses: actions/upload-artifact@v4
|
101 |
+
with:
|
102 |
+
path: llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip
|
103 |
+
name: llama-bin-macos-arm64.zip
|
104 |
+
|
105 |
+
macOS-latest-cmake-x64:
|
106 |
+
runs-on: macos-13
|
107 |
+
|
108 |
+
steps:
|
109 |
+
- name: Clone
|
110 |
+
id: checkout
|
111 |
+
uses: actions/checkout@v4
|
112 |
+
with:
|
113 |
+
fetch-depth: 0
|
114 |
+
|
115 |
+
- name: ccache
|
116 |
+
uses: hendrikmuhs/[email protected]
|
117 |
+
with:
|
118 |
+
key: macOS-latest-cmake-x64
|
119 |
+
evict-old-files: 1d
|
120 |
+
|
121 |
+
- name: Dependencies
|
122 |
+
id: depends
|
123 |
+
continue-on-error: true
|
124 |
+
run: |
|
125 |
+
brew update
|
126 |
+
|
127 |
+
- name: Build
|
128 |
+
id: cmake_build
|
129 |
+
run: |
|
130 |
+
sysctl -a
|
131 |
+
# Metal is disabled due to intermittent failures with Github runners not having a GPU:
|
132 |
+
# https://github.com/ggerganov/llama.cpp/actions/runs/8635935781/job/23674807267#step:5:2313
|
133 |
+
cmake -B build \
|
134 |
+
-DCMAKE_BUILD_RPATH="@loader_path" \
|
135 |
+
-DLLAMA_FATAL_WARNINGS=ON \
|
136 |
+
-DLLAMA_CURL=ON \
|
137 |
+
-DGGML_METAL=OFF \
|
138 |
+
-DGGML_RPC=ON
|
139 |
+
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
|
140 |
+
|
141 |
+
- name: Test
|
142 |
+
id: cmake_test
|
143 |
+
run: |
|
144 |
+
cd build
|
145 |
+
ctest -L main --verbose --timeout 900
|
146 |
+
|
147 |
+
- name: Determine tag name
|
148 |
+
id: tag
|
149 |
+
shell: bash
|
150 |
+
run: |
|
151 |
+
BUILD_NUMBER="$(git rev-list --count HEAD)"
|
152 |
+
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
|
153 |
+
if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
|
154 |
+
echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
|
155 |
+
else
|
156 |
+
SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
|
157 |
+
echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
|
158 |
+
fi
|
159 |
+
|
160 |
+
- name: Pack artifacts
|
161 |
+
id: pack_artifacts
|
162 |
+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
163 |
+
run: |
|
164 |
+
cp LICENSE ./build/bin/
|
165 |
+
cp examples/run/linenoise.cpp/LICENSE ./build/bin/LICENSE.linenoise.cpp
|
166 |
+
zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip ./build/bin/*
|
167 |
+
|
168 |
+
- name: Upload artifacts
|
169 |
+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
170 |
+
uses: actions/upload-artifact@v4
|
171 |
+
with:
|
172 |
+
path: llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip
|
173 |
+
name: llama-bin-macos-x64.zip
|
174 |
+
|
175 |
+
ubuntu-cpu-cmake:
|
176 |
+
runs-on: ubuntu-22.04
|
177 |
+
|
178 |
+
steps:
|
179 |
+
- name: Clone
|
180 |
+
id: checkout
|
181 |
+
uses: actions/checkout@v4
|
182 |
+
with:
|
183 |
+
fetch-depth: 0
|
184 |
+
|
185 |
+
- name: ccache
|
186 |
+
uses: hendrikmuhs/[email protected]
|
187 |
+
with:
|
188 |
+
key: ubuntu-cpu-cmake
|
189 |
+
evict-old-files: 1d
|
190 |
+
|
191 |
+
- name: Dependencies
|
192 |
+
id: depends
|
193 |
+
run: |
|
194 |
+
sudo apt-get update
|
195 |
+
sudo apt-get install build-essential libcurl4-openssl-dev
|
196 |
+
|
197 |
+
- name: Build
|
198 |
+
id: cmake_build
|
199 |
+
run: |
|
200 |
+
cmake -B build \
|
201 |
+
-DLLAMA_FATAL_WARNINGS=ON \
|
202 |
+
-DLLAMA_CURL=ON \
|
203 |
+
-DGGML_RPC=ON
|
204 |
+
cmake --build build --config Release -j $(nproc)
|
205 |
+
|
206 |
+
- name: Test
|
207 |
+
id: cmake_test
|
208 |
+
run: |
|
209 |
+
cd build
|
210 |
+
ctest -L 'main|curl' --verbose --timeout 900
|
211 |
+
|
212 |
+
- name: Test llama2c conversion
|
213 |
+
id: llama2c_test
|
214 |
+
run: |
|
215 |
+
cd build
|
216 |
+
echo "Fetch tokenizer"
|
217 |
+
wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/tok512.bin
|
218 |
+
echo "Fetch llama2c model"
|
219 |
+
wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/stories260K.bin
|
220 |
+
./bin/llama-convert-llama2c-to-ggml --copy-vocab-from-model ./tok512.bin --llama2c-model stories260K.bin --llama2c-output-model stories260K.gguf
|
221 |
+
./bin/llama-cli -m stories260K.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256
|
222 |
+
|
223 |
+
- name: Determine tag name
|
224 |
+
id: tag
|
225 |
+
shell: bash
|
226 |
+
run: |
|
227 |
+
BUILD_NUMBER="$(git rev-list --count HEAD)"
|
228 |
+
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
|
229 |
+
if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
|
230 |
+
echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
|
231 |
+
else
|
232 |
+
SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
|
233 |
+
echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
|
234 |
+
fi
|
235 |
+
|
236 |
+
- name: Pack artifacts
|
237 |
+
id: pack_artifacts
|
238 |
+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
239 |
+
run: |
|
240 |
+
cp LICENSE ./build/bin/
|
241 |
+
cp examples/run/linenoise.cpp/LICENSE ./build/bin/LICENSE.linenoise.cpp
|
242 |
+
zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-x64.zip ./build/bin/*
|
243 |
+
|
244 |
+
- name: Upload artifacts
|
245 |
+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
246 |
+
uses: actions/upload-artifact@v4
|
247 |
+
with:
|
248 |
+
path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-x64.zip
|
249 |
+
name: llama-bin-ubuntu-x64.zip
|
250 |
+
|
251 |
+
ubuntu-latest-cmake-sanitizer:
|
252 |
+
runs-on: ubuntu-latest
|
253 |
+
|
254 |
+
continue-on-error: true
|
255 |
+
|
256 |
+
strategy:
|
257 |
+
matrix:
|
258 |
+
sanitizer: [ADDRESS, THREAD, UNDEFINED]
|
259 |
+
build_type: [Debug]
|
260 |
+
|
261 |
+
steps:
|
262 |
+
- name: Clone
|
263 |
+
id: checkout
|
264 |
+
uses: actions/checkout@v4
|
265 |
+
|
266 |
+
- name: ccache
|
267 |
+
uses: hendrikmuhs/[email protected]
|
268 |
+
with:
|
269 |
+
key: ubuntu-latest-cmake-sanitizer-${{ matrix.sanitizer }}
|
270 |
+
evict-old-files: 1d
|
271 |
+
|
272 |
+
- name: Dependencies
|
273 |
+
id: depends
|
274 |
+
run: |
|
275 |
+
sudo apt-get update
|
276 |
+
sudo apt-get install build-essential
|
277 |
+
|
278 |
+
- name: Build
|
279 |
+
id: cmake_build
|
280 |
+
if: ${{ matrix.sanitizer != 'THREAD' }}
|
281 |
+
run: |
|
282 |
+
cmake -B build \
|
283 |
+
-DLLAMA_FATAL_WARNINGS=ON \
|
284 |
+
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \
|
285 |
+
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
|
286 |
+
cmake --build build --config ${{ matrix.build_type }} -j $(nproc)
|
287 |
+
|
288 |
+
- name: Build (no OpenMP)
|
289 |
+
id: cmake_build_no_openmp
|
290 |
+
if: ${{ matrix.sanitizer == 'THREAD' }}
|
291 |
+
run: |
|
292 |
+
cmake -B build \
|
293 |
+
-DLLAMA_FATAL_WARNINGS=ON \
|
294 |
+
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \
|
295 |
+
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
|
296 |
+
-DGGML_OPENMP=OFF
|
297 |
+
cmake --build build --config ${{ matrix.build_type }} -j $(nproc)
|
298 |
+
|
299 |
+
- name: Test
|
300 |
+
id: cmake_test
|
301 |
+
run: |
|
302 |
+
cd build
|
303 |
+
ctest -L main --verbose --timeout 900
|
304 |
+
|
305 |
+
ubuntu-latest-llguidance:
|
306 |
+
runs-on: ubuntu-latest
|
307 |
+
|
308 |
+
steps:
|
309 |
+
- name: Clone
|
310 |
+
id: checkout
|
311 |
+
uses: actions/checkout@v4
|
312 |
+
|
313 |
+
- name: Dependencies
|
314 |
+
id: depends
|
315 |
+
run: |
|
316 |
+
sudo apt-get update
|
317 |
+
sudo apt-get install build-essential
|
318 |
+
|
319 |
+
- name: Build
|
320 |
+
id: cmake_build
|
321 |
+
run: |
|
322 |
+
mkdir build
|
323 |
+
cd build
|
324 |
+
cmake .. \
|
325 |
+
-DLLAMA_FATAL_WARNINGS=ON \
|
326 |
+
-DLLAMA_LLGUIDANCE=ON
|
327 |
+
cmake --build . --config Release -j $(nproc)
|
328 |
+
|
329 |
+
- name: Test
|
330 |
+
id: cmake_test
|
331 |
+
run: |
|
332 |
+
cd build
|
333 |
+
ctest -L main --verbose --timeout 900
|
334 |
+
|
335 |
+
ubuntu-latest-cmake-rpc:
|
336 |
+
runs-on: ubuntu-latest
|
337 |
+
|
338 |
+
continue-on-error: true
|
339 |
+
|
340 |
+
steps:
|
341 |
+
- name: Clone
|
342 |
+
id: checkout
|
343 |
+
uses: actions/checkout@v4
|
344 |
+
|
345 |
+
- name: ccache
|
346 |
+
uses: hendrikmuhs/[email protected]
|
347 |
+
with:
|
348 |
+
key: ubuntu-latest-cmake-rpc
|
349 |
+
evict-old-files: 1d
|
350 |
+
|
351 |
+
- name: Dependencies
|
352 |
+
id: depends
|
353 |
+
run: |
|
354 |
+
sudo apt-get update
|
355 |
+
sudo apt-get install build-essential
|
356 |
+
|
357 |
+
- name: Build
|
358 |
+
id: cmake_build
|
359 |
+
run: |
|
360 |
+
cmake -B build \
|
361 |
+
-DGGML_RPC=ON
|
362 |
+
cmake --build build --config Release -j $(nproc)
|
363 |
+
|
364 |
+
- name: Test
|
365 |
+
id: cmake_test
|
366 |
+
run: |
|
367 |
+
cd build
|
368 |
+
ctest -L main --verbose
|
369 |
+
|
370 |
+
ubuntu-22-cmake-vulkan:
|
371 |
+
runs-on: ubuntu-22.04
|
372 |
+
|
373 |
+
steps:
|
374 |
+
- name: Clone
|
375 |
+
id: checkout
|
376 |
+
uses: actions/checkout@v4
|
377 |
+
|
378 |
+
- name: ccache
|
379 |
+
uses: hendrikmuhs/[email protected]
|
380 |
+
with:
|
381 |
+
key: ubuntu-22-cmake-vulkan
|
382 |
+
evict-old-files: 1d
|
383 |
+
|
384 |
+
- name: Dependencies
|
385 |
+
id: depends
|
386 |
+
run: |
|
387 |
+
wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add -
|
388 |
+
sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
|
389 |
+
sudo apt-get update -y
|
390 |
+
sudo apt-get install -y build-essential mesa-vulkan-drivers vulkan-sdk
|
391 |
+
|
392 |
+
- name: Build
|
393 |
+
id: cmake_build
|
394 |
+
run: |
|
395 |
+
cmake -B build \
|
396 |
+
-DGGML_VULKAN=ON
|
397 |
+
cmake --build build --config Release -j $(nproc)
|
398 |
+
|
399 |
+
- name: Test
|
400 |
+
id: cmake_test
|
401 |
+
run: |
|
402 |
+
cd build
|
403 |
+
# This is using llvmpipe and runs slower than other backends
|
404 |
+
ctest -L main --verbose --timeout 1800
|
405 |
+
|
406 |
+
ubuntu-22-cmake-hip:
|
407 |
+
runs-on: ubuntu-22.04
|
408 |
+
container: rocm/dev-ubuntu-22.04:6.0.2
|
409 |
+
|
410 |
+
steps:
|
411 |
+
- name: Clone
|
412 |
+
id: checkout
|
413 |
+
uses: actions/checkout@v4
|
414 |
+
|
415 |
+
- name: Dependencies
|
416 |
+
id: depends
|
417 |
+
run: |
|
418 |
+
sudo apt-get update
|
419 |
+
sudo apt-get install -y build-essential git cmake rocblas-dev hipblas-dev
|
420 |
+
|
421 |
+
- name: ccache
|
422 |
+
uses: hendrikmuhs/[email protected]
|
423 |
+
with:
|
424 |
+
key: ubuntu-22-cmake-hip
|
425 |
+
evict-old-files: 1d
|
426 |
+
|
427 |
+
- name: Build with native CMake HIP support
|
428 |
+
id: cmake_build
|
429 |
+
run: |
|
430 |
+
cmake -B build -S . \
|
431 |
+
-DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" \
|
432 |
+
-DGGML_HIP=ON
|
433 |
+
cmake --build build --config Release -j $(nproc)
|
434 |
+
|
435 |
+
- name: Build with legacy HIP support
|
436 |
+
id: cmake_build_legacy_hip
|
437 |
+
run: |
|
438 |
+
cmake -B build2 -S . \
|
439 |
+
-DCMAKE_C_COMPILER=hipcc \
|
440 |
+
-DCMAKE_CXX_COMPILER=hipcc \
|
441 |
+
-DGGML_HIP=ON
|
442 |
+
cmake --build build2 --config Release -j $(nproc)
|
443 |
+
|
444 |
+
ubuntu-22-cmake-musa:
|
445 |
+
runs-on: ubuntu-22.04
|
446 |
+
container: mthreads/musa:rc3.1.0-devel-ubuntu22.04
|
447 |
+
|
448 |
+
steps:
|
449 |
+
- name: Clone
|
450 |
+
id: checkout
|
451 |
+
uses: actions/checkout@v4
|
452 |
+
|
453 |
+
- name: Dependencies
|
454 |
+
id: depends
|
455 |
+
run: |
|
456 |
+
apt-get update
|
457 |
+
apt-get install -y build-essential git cmake libcurl4-openssl-dev
|
458 |
+
|
459 |
+
- name: ccache
|
460 |
+
uses: hendrikmuhs/[email protected]
|
461 |
+
with:
|
462 |
+
key: ubuntu-22-cmake-musa
|
463 |
+
evict-old-files: 1d
|
464 |
+
|
465 |
+
- name: Build with native CMake MUSA support
|
466 |
+
id: cmake_build
|
467 |
+
run: |
|
468 |
+
cmake -B build -S . \
|
469 |
+
-DGGML_MUSA=ON
|
470 |
+
cmake --build build --config Release -j $(nproc)
|
471 |
+
|
472 |
+
ubuntu-22-cmake-sycl:
|
473 |
+
runs-on: ubuntu-22.04
|
474 |
+
|
475 |
+
continue-on-error: true
|
476 |
+
|
477 |
+
steps:
|
478 |
+
- uses: actions/checkout@v4
|
479 |
+
|
480 |
+
- name: add oneAPI to apt
|
481 |
+
shell: bash
|
482 |
+
run: |
|
483 |
+
cd /tmp
|
484 |
+
wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
|
485 |
+
sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
|
486 |
+
rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
|
487 |
+
sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
|
488 |
+
|
489 |
+
- name: install oneAPI dpcpp compiler
|
490 |
+
shell: bash
|
491 |
+
run: |
|
492 |
+
sudo apt update
|
493 |
+
sudo apt install intel-oneapi-compiler-dpcpp-cpp
|
494 |
+
|
495 |
+
- name: install oneAPI MKL library
|
496 |
+
shell: bash
|
497 |
+
run: |
|
498 |
+
sudo apt install intel-oneapi-mkl-devel
|
499 |
+
|
500 |
+
- name: Clone
|
501 |
+
id: checkout
|
502 |
+
uses: actions/checkout@v4
|
503 |
+
|
504 |
+
- name: ccache
|
505 |
+
uses: hendrikmuhs/[email protected]
|
506 |
+
with:
|
507 |
+
key: ubuntu-22-cmake-sycl
|
508 |
+
evict-old-files: 1d
|
509 |
+
|
510 |
+
- name: Build
|
511 |
+
id: cmake_build
|
512 |
+
run: |
|
513 |
+
source /opt/intel/oneapi/setvars.sh
|
514 |
+
cmake -B build \
|
515 |
+
-DGGML_SYCL=ON \
|
516 |
+
-DCMAKE_C_COMPILER=icx \
|
517 |
+
-DCMAKE_CXX_COMPILER=icpx
|
518 |
+
cmake --build build --config Release -j $(nproc)
|
519 |
+
|
520 |
+
ubuntu-22-cmake-sycl-fp16:
|
521 |
+
runs-on: ubuntu-22.04
|
522 |
+
|
523 |
+
continue-on-error: true
|
524 |
+
|
525 |
+
steps:
|
526 |
+
- uses: actions/checkout@v4
|
527 |
+
|
528 |
+
- name: add oneAPI to apt
|
529 |
+
shell: bash
|
530 |
+
run: |
|
531 |
+
cd /tmp
|
532 |
+
wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
|
533 |
+
sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
|
534 |
+
rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
|
535 |
+
sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
|
536 |
+
|
537 |
+
- name: install oneAPI dpcpp compiler
|
538 |
+
shell: bash
|
539 |
+
run: |
|
540 |
+
sudo apt update
|
541 |
+
sudo apt install intel-oneapi-compiler-dpcpp-cpp
|
542 |
+
|
543 |
+
- name: install oneAPI MKL library
|
544 |
+
shell: bash
|
545 |
+
run: |
|
546 |
+
sudo apt install intel-oneapi-mkl-devel
|
547 |
+
|
548 |
+
- name: Clone
|
549 |
+
id: checkout
|
550 |
+
uses: actions/checkout@v4
|
551 |
+
|
552 |
+
- name: ccache
|
553 |
+
uses: hendrikmuhs/[email protected]
|
554 |
+
with:
|
555 |
+
key: ubuntu-22-cmake-sycl-fp16
|
556 |
+
evict-old-files: 1d
|
557 |
+
|
558 |
+
- name: Build
|
559 |
+
id: cmake_build
|
560 |
+
run: |
|
561 |
+
source /opt/intel/oneapi/setvars.sh
|
562 |
+
cmake -B build \
|
563 |
+
-DGGML_SYCL=ON \
|
564 |
+
-DCMAKE_C_COMPILER=icx \
|
565 |
+
-DCMAKE_CXX_COMPILER=icpx \
|
566 |
+
-DGGML_SYCL_F16=ON
|
567 |
+
cmake --build build --config Release -j $(nproc)
|
568 |
+
|
569 |
+
macOS-latest-cmake-ios:
|
570 |
+
runs-on: macos-latest
|
571 |
+
|
572 |
+
steps:
|
573 |
+
- name: Clone
|
574 |
+
id: checkout
|
575 |
+
uses: actions/checkout@v4
|
576 |
+
|
577 |
+
- name: ccache
|
578 |
+
uses: hendrikmuhs/[email protected]
|
579 |
+
with:
|
580 |
+
key: macOS-latest-cmake-ios
|
581 |
+
evict-old-files: 1d
|
582 |
+
|
583 |
+
- name: Dependencies
|
584 |
+
id: depends
|
585 |
+
continue-on-error: true
|
586 |
+
run: |
|
587 |
+
brew update
|
588 |
+
|
589 |
+
- name: Build
|
590 |
+
id: cmake_build
|
591 |
+
run: |
|
592 |
+
sysctl -a
|
593 |
+
cmake -B build -G Xcode \
|
594 |
+
-DGGML_METAL_USE_BF16=ON \
|
595 |
+
-DGGML_METAL_EMBED_LIBRARY=ON \
|
596 |
+
-DLLAMA_BUILD_EXAMPLES=OFF \
|
597 |
+
-DLLAMA_BUILD_TESTS=OFF \
|
598 |
+
-DLLAMA_BUILD_SERVER=OFF \
|
599 |
+
-DCMAKE_SYSTEM_NAME=iOS \
|
600 |
+
-DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
|
601 |
+
-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
|
602 |
+
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
|
603 |
+
|
604 |
+
macOS-latest-cmake-tvos:
|
605 |
+
runs-on: macos-latest
|
606 |
+
|
607 |
+
steps:
|
608 |
+
- name: Clone
|
609 |
+
id: checkout
|
610 |
+
uses: actions/checkout@v4
|
611 |
+
|
612 |
+
- name: ccache
|
613 |
+
uses: hendrikmuhs/[email protected]
|
614 |
+
with:
|
615 |
+
key: macOS-latest-cmake-tvos
|
616 |
+
evict-old-files: 1d
|
617 |
+
|
618 |
+
- name: Dependencies
|
619 |
+
id: depends
|
620 |
+
continue-on-error: true
|
621 |
+
run: |
|
622 |
+
brew update
|
623 |
+
|
624 |
+
- name: Build
|
625 |
+
id: cmake_build
|
626 |
+
run: |
|
627 |
+
sysctl -a
|
628 |
+
cmake -B build -G Xcode \
|
629 |
+
-DGGML_METAL_USE_BF16=ON \
|
630 |
+
-DGGML_METAL_EMBED_LIBRARY=ON \
|
631 |
+
-DLLAMA_BUILD_EXAMPLES=OFF \
|
632 |
+
-DLLAMA_BUILD_TESTS=OFF \
|
633 |
+
-DLLAMA_BUILD_SERVER=OFF \
|
634 |
+
-DCMAKE_SYSTEM_NAME=tvOS \
|
635 |
+
-DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
|
636 |
+
-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
|
637 |
+
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
|
638 |
+
|
639 |
+
macOS-latest-swift:
|
640 |
+
runs-on: macos-latest
|
641 |
+
|
642 |
+
strategy:
|
643 |
+
matrix:
|
644 |
+
destination: ['generic/platform=macOS', 'generic/platform=iOS', 'generic/platform=tvOS']
|
645 |
+
|
646 |
+
steps:
|
647 |
+
- name: Clone
|
648 |
+
id: checkout
|
649 |
+
uses: actions/checkout@v4
|
650 |
+
|
651 |
+
- name: ccache
|
652 |
+
uses: hendrikmuhs/[email protected]
|
653 |
+
with:
|
654 |
+
key: macOS-latest-swift
|
655 |
+
evict-old-files: 1d
|
656 |
+
|
657 |
+
- name: Dependencies
|
658 |
+
id: depends
|
659 |
+
continue-on-error: true
|
660 |
+
run: |
|
661 |
+
brew update
|
662 |
+
|
663 |
+
- name: Build llama.cpp with CMake
|
664 |
+
id: cmake_build
|
665 |
+
run: |
|
666 |
+
sysctl -a
|
667 |
+
cmake -B build -G Xcode \
|
668 |
+
-DGGML_METAL_USE_BF16=ON \
|
669 |
+
-DGGML_METAL_EMBED_LIBRARY=ON \
|
670 |
+
-DLLAMA_BUILD_EXAMPLES=OFF \
|
671 |
+
-DLLAMA_BUILD_TESTS=OFF \
|
672 |
+
-DLLAMA_BUILD_SERVER=OFF \
|
673 |
+
-DCMAKE_OSX_ARCHITECTURES="arm64;x86_64"
|
674 |
+
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
|
675 |
+
sudo cmake --install build --config Release
|
676 |
+
|
677 |
+
- name: xcodebuild for swift package
|
678 |
+
id: xcodebuild
|
679 |
+
run: |
|
680 |
+
xcodebuild -scheme llama-Package -destination "${{ matrix.destination }}"
|
681 |
+
|
682 |
+
windows-msys2:
|
683 |
+
runs-on: windows-latest
|
684 |
+
|
685 |
+
strategy:
|
686 |
+
fail-fast: false
|
687 |
+
matrix:
|
688 |
+
include:
|
689 |
+
- { sys: UCRT64, env: ucrt-x86_64, build: Release }
|
690 |
+
- { sys: CLANG64, env: clang-x86_64, build: Release }
|
691 |
+
|
692 |
+
steps:
|
693 |
+
- name: Clone
|
694 |
+
uses: actions/checkout@v4
|
695 |
+
|
696 |
+
- name: ccache
|
697 |
+
uses: hendrikmuhs/[email protected]
|
698 |
+
with:
|
699 |
+
key: windows-msys2
|
700 |
+
variant: sccache
|
701 |
+
evict-old-files: 1d
|
702 |
+
|
703 |
+
- name: Setup ${{ matrix.sys }}
|
704 |
+
uses: msys2/setup-msys2@v2
|
705 |
+
with:
|
706 |
+
update: true
|
707 |
+
msystem: ${{matrix.sys}}
|
708 |
+
install: >-
|
709 |
+
base-devel
|
710 |
+
git
|
711 |
+
mingw-w64-${{matrix.env}}-toolchain
|
712 |
+
mingw-w64-${{matrix.env}}-cmake
|
713 |
+
mingw-w64-${{matrix.env}}-openblas
|
714 |
+
|
715 |
+
- name: Build using CMake
|
716 |
+
shell: msys2 {0}
|
717 |
+
run: |
|
718 |
+
cmake -B build
|
719 |
+
cmake --build build --config ${{ matrix.build }} -j $(nproc)
|
720 |
+
|
721 |
+
- name: Clean after building using CMake
|
722 |
+
shell: msys2 {0}
|
723 |
+
run: |
|
724 |
+
rm -rf build
|
725 |
+
|
726 |
+
- name: Build using CMake w/ OpenBLAS
|
727 |
+
shell: msys2 {0}
|
728 |
+
run: |
|
729 |
+
cmake -B build -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
|
730 |
+
cmake --build build --config ${{ matrix.build }} -j $(nproc)
|
731 |
+
|
732 |
+
windows-latest-cmake:
|
733 |
+
runs-on: windows-latest
|
734 |
+
|
735 |
+
env:
|
736 |
+
OPENBLAS_VERSION: 0.3.23
|
737 |
+
SDE_VERSION: 9.33.0-2024-01-07
|
738 |
+
VULKAN_VERSION: 1.3.261.1
|
739 |
+
|
740 |
+
strategy:
|
741 |
+
matrix:
|
742 |
+
include:
|
743 |
+
- build: 'noavx-x64'
|
744 |
+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF'
|
745 |
+
- build: 'avx2-x64'
|
746 |
+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON'
|
747 |
+
- build: 'avx-x64'
|
748 |
+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX2=OFF'
|
749 |
+
- build: 'avx512-x64'
|
750 |
+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX512=ON'
|
751 |
+
- build: 'openblas-x64'
|
752 |
+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
|
753 |
+
- build: 'kompute-x64'
|
754 |
+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON'
|
755 |
+
- build: 'vulkan-x64'
|
756 |
+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_VULKAN=ON'
|
757 |
+
- build: 'llvm-arm64'
|
758 |
+
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON'
|
759 |
+
- build: 'msvc-arm64'
|
760 |
+
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-msvc.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON'
|
761 |
+
- build: 'llvm-arm64-opencl-adreno'
|
762 |
+
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" -DGGML_OPENCL=ON -DGGML_OPENCL_USE_ADRENO_KERNELS=ON'
|
763 |
+
|
764 |
+
steps:
|
765 |
+
- name: Clone
|
766 |
+
id: checkout
|
767 |
+
uses: actions/checkout@v4
|
768 |
+
with:
|
769 |
+
fetch-depth: 0
|
770 |
+
|
771 |
+
- name: ccache
|
772 |
+
uses: hendrikmuhs/[email protected]
|
773 |
+
with:
|
774 |
+
key: windows-latest-cmake-${{ matrix.build }}
|
775 |
+
variant: sccache
|
776 |
+
evict-old-files: 1d
|
777 |
+
|
778 |
+
- name: Clone Kompute submodule
|
779 |
+
id: clone_kompute
|
780 |
+
if: ${{ matrix.build == 'kompute-x64' }}
|
781 |
+
run: |
|
782 |
+
git submodule update --init ggml/src/ggml-kompute/kompute
|
783 |
+
|
784 |
+
- name: Download OpenBLAS
|
785 |
+
id: get_openblas
|
786 |
+
if: ${{ matrix.build == 'openblas-x64' }}
|
787 |
+
run: |
|
788 |
+
curl.exe -o $env:RUNNER_TEMP/openblas.zip -L "https://github.com/xianyi/OpenBLAS/releases/download/v${env:OPENBLAS_VERSION}/OpenBLAS-${env:OPENBLAS_VERSION}-x64.zip"
|
789 |
+
curl.exe -o $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt -L "https://github.com/xianyi/OpenBLAS/raw/v${env:OPENBLAS_VERSION}/LICENSE"
|
790 |
+
mkdir $env:RUNNER_TEMP/openblas
|
791 |
+
tar.exe -xvf $env:RUNNER_TEMP/openblas.zip -C $env:RUNNER_TEMP/openblas
|
792 |
+
$vcdir = $(vswhere -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath)
|
793 |
+
$msvc = $(join-path $vcdir $('VC\Tools\MSVC\'+$(gc -raw $(join-path $vcdir 'VC\Auxiliary\Build\Microsoft.VCToolsVersion.default.txt')).Trim()))
|
794 |
+
$lib = $(join-path $msvc 'bin\Hostx64\x64\lib.exe')
|
795 |
+
& $lib /machine:x64 "/def:${env:RUNNER_TEMP}/openblas/lib/libopenblas.def" "/out:${env:RUNNER_TEMP}/openblas/lib/openblas.lib" /name:openblas.dll
|
796 |
+
|
797 |
+
- name: Install Vulkan SDK
|
798 |
+
id: get_vulkan
|
799 |
+
if: ${{ matrix.build == 'kompute-x64' || matrix.build == 'vulkan-x64' }}
|
800 |
+
run: |
|
801 |
+
curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/VulkanSDK-${env:VULKAN_VERSION}-Installer.exe"
|
802 |
+
& "$env:RUNNER_TEMP\VulkanSDK-Installer.exe" --accept-licenses --default-answer --confirm-command install
|
803 |
+
Add-Content $env:GITHUB_ENV "VULKAN_SDK=C:\VulkanSDK\${env:VULKAN_VERSION}"
|
804 |
+
Add-Content $env:GITHUB_PATH "C:\VulkanSDK\${env:VULKAN_VERSION}\bin"
|
805 |
+
|
806 |
+
- name: Install Ninja
|
807 |
+
id: install_ninja
|
808 |
+
run: |
|
809 |
+
choco install ninja
|
810 |
+
|
811 |
+
- name: Install OpenCL Headers and Libs
|
812 |
+
id: install_opencl
|
813 |
+
if: ${{ matrix.build == 'llvm-arm64-opencl-adreno' }}
|
814 |
+
run: |
|
815 |
+
git clone https://github.com/KhronosGroup/OpenCL-Headers
|
816 |
+
cd OpenCL-Headers
|
817 |
+
cmake -B build `
|
818 |
+
-DBUILD_TESTING=OFF `
|
819 |
+
-DOPENCL_HEADERS_BUILD_TESTING=OFF `
|
820 |
+
-DOPENCL_HEADERS_BUILD_CXX_TESTS=OFF `
|
821 |
+
-DCMAKE_INSTALL_PREFIX="$env:RUNNER_TEMP/opencl-arm64-release"
|
822 |
+
cmake --build build --target install
|
823 |
+
git clone https://github.com/KhronosGroup/OpenCL-ICD-Loader
|
824 |
+
cd OpenCL-ICD-Loader
|
825 |
+
cmake -B build-arm64-release `
|
826 |
+
-A arm64 `
|
827 |
+
-DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" `
|
828 |
+
-DCMAKE_INSTALL_PREFIX="$env:RUNNER_TEMP/opencl-arm64-release"
|
829 |
+
cmake --build build-arm64-release --target install --config release
|
830 |
+
|
831 |
+
- name: Build
|
832 |
+
id: cmake_build
|
833 |
+
run: |
|
834 |
+
cmake -S . -B build ${{ matrix.defines }}
|
835 |
+
cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS}
|
836 |
+
|
837 |
+
- name: Add libopenblas.dll
|
838 |
+
id: add_libopenblas_dll
|
839 |
+
if: ${{ matrix.build == 'openblas-x64' }}
|
840 |
+
run: |
|
841 |
+
cp $env:RUNNER_TEMP/openblas/bin/libopenblas.dll ./build/bin/Release/openblas.dll
|
842 |
+
cp $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt ./build/bin/Release/OpenBLAS-${env:OPENBLAS_VERSION}.txt
|
843 |
+
|
844 |
+
- name: Check AVX512F support
|
845 |
+
id: check_avx512f
|
846 |
+
if: ${{ matrix.build == 'avx512-x64' }}
|
847 |
+
continue-on-error: true
|
848 |
+
run: |
|
849 |
+
cd build
|
850 |
+
$vcdir = $(vswhere -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath)
|
851 |
+
$msvc = $(join-path $vcdir $('VC\Tools\MSVC\'+$(gc -raw $(join-path $vcdir 'VC\Auxiliary\Build\Microsoft.VCToolsVersion.default.txt')).Trim()))
|
852 |
+
$cl = $(join-path $msvc 'bin\Hostx64\x64\cl.exe')
|
853 |
+
echo 'int main(void){unsigned int a[4];__cpuid(a,7);return !(a[1]&65536);}' >> avx512f.c
|
854 |
+
& $cl /O2 /GS- /kernel avx512f.c /link /nodefaultlib /entry:main
|
855 |
+
.\avx512f.exe && echo "AVX512F: YES" && ( echo HAS_AVX512F=1 >> $env:GITHUB_ENV ) || echo "AVX512F: NO"
|
856 |
+
|
857 |
+
- name: Test
|
858 |
+
id: cmake_test
|
859 |
+
# not all machines have native AVX-512
|
860 |
+
if: ${{ matrix.build != 'msvc-arm64' && matrix.build != 'llvm-arm64' && matrix.build != 'llvm-arm64-opencl-adreno' && matrix.build != 'kompute-x64' && matrix.build != 'vulkan-x64' && (matrix.build != 'avx512-x64' || env.HAS_AVX512F == '1') }}
|
861 |
+
run: |
|
862 |
+
cd build
|
863 |
+
ctest -L main -C Release --verbose --timeout 900
|
864 |
+
|
865 |
+
- name: Test (Intel SDE)
|
866 |
+
id: cmake_test_sde
|
867 |
+
if: ${{ matrix.build == 'avx512-x64' && env.HAS_AVX512F == '0' }} # use Intel SDE for AVX-512 emulation
|
868 |
+
run: |
|
869 |
+
curl.exe -o $env:RUNNER_TEMP/sde.tar.xz -L "https://downloadmirror.intel.com/813591/sde-external-${env:SDE_VERSION}-win.tar.xz"
|
870 |
+
# for some weird reason windows tar doesn't like sde tar.xz
|
871 |
+
7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar.xz
|
872 |
+
7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar
|
873 |
+
$sde = $(join-path $env:RUNNER_TEMP sde-external-${env:SDE_VERSION}-win/sde.exe)
|
874 |
+
cd build
|
875 |
+
$env:LLAMA_SKIP_TESTS_SLOW_ON_EMULATOR = 1
|
876 |
+
& $sde -future -- ctest -L main -C Release --verbose --timeout 900
|
877 |
+
|
878 |
+
- name: Determine tag name
|
879 |
+
id: tag
|
880 |
+
shell: bash
|
881 |
+
run: |
|
882 |
+
BUILD_NUMBER="$(git rev-list --count HEAD)"
|
883 |
+
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
|
884 |
+
if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
|
885 |
+
echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
|
886 |
+
else
|
887 |
+
SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
|
888 |
+
echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
|
889 |
+
fi
|
890 |
+
|
891 |
+
- name: Pack artifacts
|
892 |
+
id: pack_artifacts
|
893 |
+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
894 |
+
run: |
|
895 |
+
Copy-Item LICENSE .\build\bin\Release\llama.cpp.txt
|
896 |
+
Copy-Item .\examples\run\linenoise.cpp\LICENSE .\build\bin\Release\linenoise.cpp.txt
|
897 |
+
7z a llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}.zip .\build\bin\Release\*
|
898 |
+
|
899 |
+
- name: Upload artifacts
|
900 |
+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
901 |
+
uses: actions/upload-artifact@v4
|
902 |
+
with:
|
903 |
+
path: llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}.zip
|
904 |
+
name: llama-bin-win-${{ matrix.build }}.zip
|
905 |
+
|
906 |
+
ubuntu-latest-cmake-cuda:
|
907 |
+
runs-on: ubuntu-latest
|
908 |
+
container: nvidia/cuda:12.6.2-devel-ubuntu24.04
|
909 |
+
|
910 |
+
steps:
|
911 |
+
- name: Clone
|
912 |
+
id: checkout
|
913 |
+
uses: actions/checkout@v4
|
914 |
+
with:
|
915 |
+
fetch-depth: 0
|
916 |
+
|
917 |
+
- name: Install dependencies
|
918 |
+
env:
|
919 |
+
DEBIAN_FRONTEND: noninteractive
|
920 |
+
run: |
|
921 |
+
apt update
|
922 |
+
apt install -y cmake build-essential ninja-build libgomp1 git
|
923 |
+
|
924 |
+
- name: ccache
|
925 |
+
uses: hendrikmuhs/[email protected]
|
926 |
+
with:
|
927 |
+
key: ubuntu-latest-cmake-cuda
|
928 |
+
evict-old-files: 1d
|
929 |
+
|
930 |
+
- name: Build with CMake
|
931 |
+
run: |
|
932 |
+
cmake -S . -B build -G Ninja \
|
933 |
+
-DCMAKE_BUILD_TYPE=Release \
|
934 |
+
-DCMAKE_CUDA_ARCHITECTURES=89-real \
|
935 |
+
-DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined \
|
936 |
+
-DLLAMA_FATAL_WARNINGS=ON \
|
937 |
+
-DGGML_NATIVE=OFF \
|
938 |
+
-DGGML_CUDA=ON
|
939 |
+
cmake --build build
|
940 |
+
|
941 |
+
windows-2019-cmake-cuda:
|
942 |
+
runs-on: windows-2019
|
943 |
+
|
944 |
+
strategy:
|
945 |
+
matrix:
|
946 |
+
cuda: ['12.4', '11.7']
|
947 |
+
build: ['cuda']
|
948 |
+
|
949 |
+
steps:
|
950 |
+
- name: Clone
|
951 |
+
id: checkout
|
952 |
+
uses: actions/checkout@v4
|
953 |
+
with:
|
954 |
+
fetch-depth: 0
|
955 |
+
|
956 |
+
- name: Install ccache
|
957 |
+
uses: hendrikmuhs/[email protected]
|
958 |
+
with:
|
959 |
+
key: ${{ github.job }}-${{ matrix.cuda }}-${{ matrix.build }}
|
960 |
+
variant: sccache
|
961 |
+
evict-old-files: 1d
|
962 |
+
|
963 |
+
- name: Install Cuda Toolkit 11.7
|
964 |
+
if: ${{ matrix.cuda == '11.7' }}
|
965 |
+
run: |
|
966 |
+
mkdir -p "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7"
|
967 |
+
choco install unzip -y
|
968 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-11.7.99-archive.zip"
|
969 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-11.7.99-archive.zip"
|
970 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-11.7.99-archive.zip"
|
971 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/libcublas/windows-x86_64/libcublas-windows-x86_64-11.7.4.6-archive.zip"
|
972 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-11.7.91-archive.zip"
|
973 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-11.7.91-archive.zip"
|
974 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-11.7.101-archive.zip"
|
975 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-11.7.91-archive.zip"
|
976 |
+
unzip '*.zip' -d "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7"
|
977 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_cudart-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
|
978 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvcc-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
|
979 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvrtc-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
|
980 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\libcublas-windows-x86_64-11.7.4.6-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
|
981 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvtx-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
|
982 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\visual_studio_integration-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
|
983 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvprof-windows-x86_64-11.7.101-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
|
984 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_cccl-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
|
985 |
+
echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
986 |
+
echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
987 |
+
echo "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
|
988 |
+
echo "CUDA_PATH_V11_7=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
|
989 |
+
|
990 |
+
- name: Install Cuda Toolkit 12.4
|
991 |
+
if: ${{ matrix.cuda == '12.4' }}
|
992 |
+
run: |
|
993 |
+
mkdir -p "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4"
|
994 |
+
choco install unzip -y
|
995 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-12.4.127-archive.zip"
|
996 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-12.4.131-archive.zip"
|
997 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-12.4.127-archive.zip"
|
998 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/libcublas/windows-x86_64/libcublas-windows-x86_64-12.4.5.8-archive.zip"
|
999 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-12.4.127-archive.zip"
|
1000 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_profiler_api/windows-x86_64/cuda_profiler_api-windows-x86_64-12.4.127-archive.zip"
|
1001 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-12.4.127-archive.zip"
|
1002 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-12.4.127-archive.zip"
|
1003 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-12.4.127-archive.zip"
|
1004 |
+
unzip '*.zip' -d "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4"
|
1005 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_cudart-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
|
1006 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvcc-windows-x86_64-12.4.131-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
|
1007 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvrtc-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
|
1008 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\libcublas-windows-x86_64-12.4.5.8-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
|
1009 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvtx-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
|
1010 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_profiler_api-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
|
1011 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\visual_studio_integration-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
|
1012 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvprof-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
|
1013 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_cccl-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
|
1014 |
+
echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
1015 |
+
echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
1016 |
+
echo "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
|
1017 |
+
echo "CUDA_PATH_V12_4=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
|
1018 |
+
|
1019 |
+
- name: Install Ninja
|
1020 |
+
id: install_ninja
|
1021 |
+
run: |
|
1022 |
+
choco install ninja
|
1023 |
+
|
1024 |
+
- name: Build
|
1025 |
+
id: cmake_build
|
1026 |
+
shell: cmd
|
1027 |
+
run: |
|
1028 |
+
call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat"
|
1029 |
+
cmake -S . -B build -G "Ninja Multi-Config" ^
|
1030 |
+
-DLLAMA_BUILD_SERVER=ON ^
|
1031 |
+
-DGGML_NATIVE=OFF ^
|
1032 |
+
-DGGML_CUDA=ON ^
|
1033 |
+
-DGGML_RPC=ON
|
1034 |
+
set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1
|
1035 |
+
cmake --build build --config Release -j %NINJA_JOBS% -t ggml
|
1036 |
+
cmake --build build --config Release
|
1037 |
+
|
1038 |
+
- name: Determine tag name
|
1039 |
+
id: tag
|
1040 |
+
shell: bash
|
1041 |
+
run: |
|
1042 |
+
BUILD_NUMBER="$(git rev-list --count HEAD)"
|
1043 |
+
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
|
1044 |
+
if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
|
1045 |
+
echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
|
1046 |
+
else
|
1047 |
+
SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
|
1048 |
+
echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
|
1049 |
+
fi
|
1050 |
+
|
1051 |
+
- name: Pack artifacts
|
1052 |
+
id: pack_artifacts
|
1053 |
+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
1054 |
+
run: |
|
1055 |
+
7z a llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}-cu${{ matrix.cuda }}-x64.zip .\build\bin\Release\*
|
1056 |
+
|
1057 |
+
- name: Upload artifacts
|
1058 |
+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
1059 |
+
uses: actions/upload-artifact@v4
|
1060 |
+
with:
|
1061 |
+
path: llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}-cu${{ matrix.cuda }}-x64.zip
|
1062 |
+
name: llama-bin-win-cu${{ matrix.cuda }}-x64.zip
|
1063 |
+
|
1064 |
+
- name: Copy and pack Cuda runtime
|
1065 |
+
if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
|
1066 |
+
run: |
|
1067 |
+
echo "Cuda install location: ${{ env.CUDA_PATH }}"
|
1068 |
+
$dst='.\build\bin\cudart\'
|
1069 |
+
robocopy "${{env.CUDA_PATH}}\bin" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll
|
1070 |
+
robocopy "${{env.CUDA_PATH}}\lib" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll
|
1071 |
+
7z a cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip $dst\*
|
1072 |
+
|
1073 |
+
- name: Upload Cuda runtime
|
1074 |
+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
1075 |
+
uses: actions/upload-artifact@v4
|
1076 |
+
with:
|
1077 |
+
path: cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip
|
1078 |
+
name: cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip
|
1079 |
+
|
1080 |
+
windows-latest-cmake-sycl:
|
1081 |
+
runs-on: windows-latest
|
1082 |
+
|
1083 |
+
defaults:
|
1084 |
+
run:
|
1085 |
+
shell: bash
|
1086 |
+
|
1087 |
+
env:
|
1088 |
+
WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/b380d914-366b-4b77-a74a-05e3c38b3514/intel-oneapi-base-toolkit-2025.0.0.882_offline.exe
|
1089 |
+
WINDOWS_DPCPP_MKL: intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel:intel.oneapi.win.dnnl:intel.oneapi.win.tbb.devel
|
1090 |
+
ONEAPI_ROOT: "C:/Program Files (x86)/Intel/oneAPI"
|
1091 |
+
steps:
|
1092 |
+
- name: Clone
|
1093 |
+
id: checkout
|
1094 |
+
uses: actions/checkout@v4
|
1095 |
+
with:
|
1096 |
+
fetch-depth: 0
|
1097 |
+
|
1098 |
+
- name: ccache
|
1099 |
+
uses: hendrikmuhs/[email protected]
|
1100 |
+
with:
|
1101 |
+
key: windows-latest-cmake-sycl
|
1102 |
+
variant: sccache
|
1103 |
+
evict-old-files: 1d
|
1104 |
+
|
1105 |
+
- name: Install
|
1106 |
+
run: |
|
1107 |
+
scripts/install-oneapi.bat $WINDOWS_BASEKIT_URL $WINDOWS_DPCPP_MKL
|
1108 |
+
|
1109 |
+
- name: Build
|
1110 |
+
id: cmake_build
|
1111 |
+
run: examples/sycl/win-build-sycl.bat
|
1112 |
+
|
1113 |
+
- name: Determine tag name
|
1114 |
+
id: tag
|
1115 |
+
shell: bash
|
1116 |
+
run: |
|
1117 |
+
BUILD_NUMBER="$(git rev-list --count HEAD)"
|
1118 |
+
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
|
1119 |
+
if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
|
1120 |
+
echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
|
1121 |
+
else
|
1122 |
+
SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
|
1123 |
+
echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
|
1124 |
+
fi
|
1125 |
+
|
1126 |
+
- name: Build the release package
|
1127 |
+
id: pack_artifacts
|
1128 |
+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
1129 |
+
run: |
|
1130 |
+
echo "cp oneAPI running time dll files in ${{ env.ONEAPI_ROOT }} to ./build/bin"
|
1131 |
+
|
1132 |
+
cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_sycl_blas.5.dll" ./build/bin
|
1133 |
+
cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_core.2.dll" ./build/bin
|
1134 |
+
cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_tbb_thread.2.dll" ./build/bin
|
1135 |
+
|
1136 |
+
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_adapter_level_zero.dll" ./build/bin
|
1137 |
+
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_adapter_opencl.dll" ./build/bin
|
1138 |
+
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_loader.dll" ./build/bin
|
1139 |
+
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_win_proxy_loader.dll" ./build/bin
|
1140 |
+
|
1141 |
+
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/sycl8.dll" ./build/bin
|
1142 |
+
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/svml_dispmd.dll" ./build/bin
|
1143 |
+
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libmmd.dll" ./build/bin
|
1144 |
+
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libiomp5md.dll" ./build/bin
|
1145 |
+
|
1146 |
+
cp "${{ env.ONEAPI_ROOT }}/dnnl/latest/bin/dnnl.dll" ./build/bin
|
1147 |
+
cp "${{ env.ONEAPI_ROOT }}/tbb/latest/bin/tbb12.dll" ./build/bin
|
1148 |
+
|
1149 |
+
echo "cp oneAPI running time dll files to ./build/bin done"
|
1150 |
+
7z a llama-${{ steps.tag.outputs.name }}-bin-win-sycl-x64.zip ./build/bin/*
|
1151 |
+
|
1152 |
+
- name: Upload the release package
|
1153 |
+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
1154 |
+
uses: actions/upload-artifact@v4
|
1155 |
+
with:
|
1156 |
+
path: llama-${{ steps.tag.outputs.name }}-bin-win-sycl-x64.zip
|
1157 |
+
name: llama-bin-win-sycl-x64.zip
|
1158 |
+
|
1159 |
+
windows-latest-cmake-hip:
|
1160 |
+
if: ${{ github.event.inputs.create_release != 'true' }}
|
1161 |
+
runs-on: windows-latest
|
1162 |
+
|
1163 |
+
steps:
|
1164 |
+
- name: Clone
|
1165 |
+
id: checkout
|
1166 |
+
uses: actions/checkout@v4
|
1167 |
+
|
1168 |
+
- name: Install
|
1169 |
+
id: depends
|
1170 |
+
run: |
|
1171 |
+
$ErrorActionPreference = "Stop"
|
1172 |
+
write-host "Downloading AMD HIP SDK Installer"
|
1173 |
+
Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
|
1174 |
+
write-host "Installing AMD HIP SDK"
|
1175 |
+
Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait
|
1176 |
+
write-host "Completed AMD HIP SDK installation"
|
1177 |
+
|
1178 |
+
- name: Verify ROCm
|
1179 |
+
id: verify
|
1180 |
+
run: |
|
1181 |
+
& 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version
|
1182 |
+
|
1183 |
+
- name: Install ccache
|
1184 |
+
uses: hendrikmuhs/[email protected]
|
1185 |
+
with:
|
1186 |
+
key: ${{ github.job }}
|
1187 |
+
evict-old-files: 1d
|
1188 |
+
|
1189 |
+
- name: Build
|
1190 |
+
id: cmake_build
|
1191 |
+
run: |
|
1192 |
+
$env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
|
1193 |
+
$env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
|
1194 |
+
cmake -G "Unix Makefiles" -B build -S . `
|
1195 |
+
-DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" `
|
1196 |
+
-DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" `
|
1197 |
+
-DCMAKE_BUILD_TYPE=Release `
|
1198 |
+
-DGGML_HIP=ON `
|
1199 |
+
-DGGML_RPC=ON
|
1200 |
+
cmake --build build -j ${env:NUMBER_OF_PROCESSORS}
|
1201 |
+
|
1202 |
+
windows-latest-cmake-hip-release:
|
1203 |
+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
1204 |
+
runs-on: windows-latest
|
1205 |
+
|
1206 |
+
strategy:
|
1207 |
+
matrix:
|
1208 |
+
gpu_target: [gfx1100, gfx1101, gfx1030]
|
1209 |
+
|
1210 |
+
steps:
|
1211 |
+
- name: Clone
|
1212 |
+
id: checkout
|
1213 |
+
uses: actions/checkout@v4
|
1214 |
+
with:
|
1215 |
+
fetch-depth: 0
|
1216 |
+
|
1217 |
+
- name: ccache
|
1218 |
+
uses: hendrikmuhs/[email protected]
|
1219 |
+
with:
|
1220 |
+
key: windows-latest-cmake-hip-release
|
1221 |
+
evict-old-files: 1d
|
1222 |
+
|
1223 |
+
- name: Install
|
1224 |
+
id: depends
|
1225 |
+
run: |
|
1226 |
+
$ErrorActionPreference = "Stop"
|
1227 |
+
write-host "Downloading AMD HIP SDK Installer"
|
1228 |
+
Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
|
1229 |
+
write-host "Installing AMD HIP SDK"
|
1230 |
+
Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait
|
1231 |
+
write-host "Completed AMD HIP SDK installation"
|
1232 |
+
|
1233 |
+
- name: Verify ROCm
|
1234 |
+
id: verify
|
1235 |
+
run: |
|
1236 |
+
& 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version
|
1237 |
+
|
1238 |
+
- name: Build
|
1239 |
+
id: cmake_build
|
1240 |
+
run: |
|
1241 |
+
$env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
|
1242 |
+
$env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
|
1243 |
+
cmake -G "Unix Makefiles" -B build -S . `
|
1244 |
+
-DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" `
|
1245 |
+
-DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" `
|
1246 |
+
-DCMAKE_BUILD_TYPE=Release `
|
1247 |
+
-DAMDGPU_TARGETS=${{ matrix.gpu_target }} `
|
1248 |
+
-DGGML_HIP=ON `
|
1249 |
+
-DGGML_RPC=ON
|
1250 |
+
cmake --build build -j ${env:NUMBER_OF_PROCESSORS}
|
1251 |
+
md "build\bin\rocblas\library\"
|
1252 |
+
cp "${env:HIP_PATH}\bin\hipblas.dll" "build\bin\"
|
1253 |
+
cp "${env:HIP_PATH}\bin\rocblas.dll" "build\bin\"
|
1254 |
+
cp "${env:HIP_PATH}\bin\rocblas\library\*" "build\bin\rocblas\library\"
|
1255 |
+
|
1256 |
+
- name: Determine tag name
|
1257 |
+
id: tag
|
1258 |
+
shell: bash
|
1259 |
+
run: |
|
1260 |
+
BUILD_NUMBER="$(git rev-list --count HEAD)"
|
1261 |
+
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
|
1262 |
+
if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
|
1263 |
+
echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
|
1264 |
+
else
|
1265 |
+
SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
|
1266 |
+
echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
|
1267 |
+
fi
|
1268 |
+
|
1269 |
+
- name: Pack artifacts
|
1270 |
+
id: pack_artifacts
|
1271 |
+
run: |
|
1272 |
+
7z a llama-${{ steps.tag.outputs.name }}-bin-win-hip-x64-${{ matrix.gpu_target }}.zip .\build\bin\*
|
1273 |
+
|
1274 |
+
- name: Upload artifacts
|
1275 |
+
uses: actions/upload-artifact@v4
|
1276 |
+
with:
|
1277 |
+
path: llama-${{ steps.tag.outputs.name }}-bin-win-hip-x64-${{ matrix.gpu_target }}.zip
|
1278 |
+
name: llama-bin-win-hip-x64-${{ matrix.gpu_target }}.zip
|
1279 |
+
|
1280 |
+
ios-xcode-build:
|
1281 |
+
runs-on: macos-latest
|
1282 |
+
|
1283 |
+
steps:
|
1284 |
+
- name: Checkout code
|
1285 |
+
uses: actions/checkout@v4
|
1286 |
+
|
1287 |
+
- name: Build
|
1288 |
+
id: cmake_build
|
1289 |
+
run: |
|
1290 |
+
sysctl -a
|
1291 |
+
cmake -B build -G Xcode \
|
1292 |
+
-DGGML_METAL_USE_BF16=ON \
|
1293 |
+
-DGGML_METAL_EMBED_LIBRARY=ON \
|
1294 |
+
-DLLAMA_BUILD_EXAMPLES=OFF \
|
1295 |
+
-DLLAMA_BUILD_TESTS=OFF \
|
1296 |
+
-DLLAMA_BUILD_SERVER=OFF \
|
1297 |
+
-DCMAKE_SYSTEM_NAME=iOS \
|
1298 |
+
-DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
|
1299 |
+
-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
|
1300 |
+
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
|
1301 |
+
sudo cmake --install build --config Release
|
1302 |
+
|
1303 |
+
- name: xcodebuild for swift package
|
1304 |
+
id: xcodebuild
|
1305 |
+
run: |
|
1306 |
+
xcodebuild -scheme llama-Package -destination 'generic/platform=iOS'
|
1307 |
+
|
1308 |
+
- name: Build Xcode project
|
1309 |
+
run: xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' build
|
1310 |
+
|
1311 |
+
android-build:
|
1312 |
+
runs-on: ubuntu-latest
|
1313 |
+
|
1314 |
+
steps:
|
1315 |
+
- name: Clone
|
1316 |
+
uses: actions/checkout@v4
|
1317 |
+
|
1318 |
+
- name: ccache
|
1319 |
+
uses: hendrikmuhs/[email protected]
|
1320 |
+
with:
|
1321 |
+
key: android-build
|
1322 |
+
evict-old-files: 1d
|
1323 |
+
|
1324 |
+
- name: Set up JDK
|
1325 |
+
uses: actions/setup-java@v3
|
1326 |
+
with:
|
1327 |
+
java-version: 17
|
1328 |
+
distribution: zulu
|
1329 |
+
|
1330 |
+
- name: Setup Android SDK
|
1331 |
+
uses: android-actions/setup-android@v3
|
1332 |
+
with:
|
1333 |
+
log-accepted-android-sdk-licenses: false
|
1334 |
+
|
1335 |
+
- name: Build
|
1336 |
+
run: |
|
1337 |
+
cd examples/llama.android
|
1338 |
+
|
1339 |
+
./gradlew build --no-daemon
|
1340 |
+
|
1341 |
+
release:
|
1342 |
+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
1343 |
+
|
1344 |
+
runs-on: ubuntu-latest
|
1345 |
+
|
1346 |
+
needs:
|
1347 |
+
- ubuntu-cpu-cmake
|
1348 |
+
- windows-latest-cmake
|
1349 |
+
- windows-2019-cmake-cuda
|
1350 |
+
- windows-latest-cmake-hip-release
|
1351 |
+
- macOS-latest-cmake-arm64
|
1352 |
+
- macOS-latest-cmake-x64
|
1353 |
+
|
1354 |
+
steps:
|
1355 |
+
- name: Clone
|
1356 |
+
id: checkout
|
1357 |
+
uses: actions/checkout@v4
|
1358 |
+
with:
|
1359 |
+
fetch-depth: 0
|
1360 |
+
|
1361 |
+
- name: ccache
|
1362 |
+
uses: hendrikmuhs/[email protected]
|
1363 |
+
with:
|
1364 |
+
key: release
|
1365 |
+
evict-old-files: 1d
|
1366 |
+
|
1367 |
+
- name: Determine tag name
|
1368 |
+
id: tag
|
1369 |
+
shell: bash
|
1370 |
+
run: |
|
1371 |
+
BUILD_NUMBER="$(git rev-list --count HEAD)"
|
1372 |
+
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
|
1373 |
+
if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
|
1374 |
+
echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
|
1375 |
+
else
|
1376 |
+
SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
|
1377 |
+
echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
|
1378 |
+
fi
|
1379 |
+
|
1380 |
+
- name: Download artifacts
|
1381 |
+
id: download-artifact
|
1382 |
+
uses: actions/download-artifact@v4
|
1383 |
+
with:
|
1384 |
+
path: ./artifact
|
1385 |
+
|
1386 |
+
- name: Move artifacts
|
1387 |
+
id: move_artifacts
|
1388 |
+
run: mkdir -p ./artifact/release && mv ./artifact/*/*.zip ./artifact/release
|
1389 |
+
|
1390 |
+
- name: Create release
|
1391 |
+
id: create_release
|
1392 |
+
uses: ggml-org/action-create-release@v1
|
1393 |
+
env:
|
1394 |
+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
1395 |
+
with:
|
1396 |
+
tag_name: ${{ steps.tag.outputs.name }}
|
1397 |
+
|
1398 |
+
- name: Upload release
|
1399 |
+
id: upload_release
|
1400 |
+
uses: actions/github-script@v3
|
1401 |
+
with:
|
1402 |
+
github-token: ${{secrets.GITHUB_TOKEN}}
|
1403 |
+
script: |
|
1404 |
+
const path = require('path');
|
1405 |
+
const fs = require('fs');
|
1406 |
+
const release_id = '${{ steps.create_release.outputs.id }}';
|
1407 |
+
for (let file of await fs.readdirSync('./artifact/release')) {
|
1408 |
+
if (path.extname(file) === '.zip') {
|
1409 |
+
console.log('uploadReleaseAsset', file);
|
1410 |
+
await github.repos.uploadReleaseAsset({
|
1411 |
+
owner: context.repo.owner,
|
1412 |
+
repo: context.repo.repo,
|
1413 |
+
release_id: release_id,
|
1414 |
+
name: file,
|
1415 |
+
data: await fs.readFileSync(`./artifact/release/${file}`)
|
1416 |
+
});
|
1417 |
+
}
|
1418 |
+
}
|
1419 |
+
|
1420 |
+
# ubuntu-latest-gcc:
|
1421 |
+
# runs-on: ubuntu-latest
|
1422 |
+
#
|
1423 |
+
# strategy:
|
1424 |
+
# matrix:
|
1425 |
+
# build: [Debug, Release]
|
1426 |
+
#
|
1427 |
+
# steps:
|
1428 |
+
# - name: Clone
|
1429 |
+
# uses: actions/checkout@v4
|
1430 |
+
#
|
1431 |
+
# - name: Dependencies
|
1432 |
+
# run: |
|
1433 |
+
# sudo apt-get update
|
1434 |
+
# sudo apt-get install build-essential
|
1435 |
+
# sudo apt-get install cmake
|
1436 |
+
#
|
1437 |
+
# - name: Configure
|
1438 |
+
# run: cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }}
|
1439 |
+
#
|
1440 |
+
# - name: Build
|
1441 |
+
# run: |
|
1442 |
+
# make
|
1443 |
+
#
|
1444 |
+
# ubuntu-latest-clang:
|
1445 |
+
# runs-on: ubuntu-latest
|
1446 |
+
#
|
1447 |
+
# strategy:
|
1448 |
+
# matrix:
|
1449 |
+
# build: [Debug, Release]
|
1450 |
+
#
|
1451 |
+
# steps:
|
1452 |
+
# - name: Clone
|
1453 |
+
# uses: actions/checkout@v4
|
1454 |
+
#
|
1455 |
+
# - name: Dependencies
|
1456 |
+
# run: |
|
1457 |
+
# sudo apt-get update
|
1458 |
+
# sudo apt-get install build-essential
|
1459 |
+
# sudo apt-get install cmake
|
1460 |
+
#
|
1461 |
+
# - name: Configure
|
1462 |
+
# run: cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang
|
1463 |
+
#
|
1464 |
+
# - name: Build
|
1465 |
+
# run: |
|
1466 |
+
# make
|
1467 |
+
#
|
1468 |
+
# ubuntu-latest-gcc-sanitized:
|
1469 |
+
# runs-on: ubuntu-latest
|
1470 |
+
#
|
1471 |
+
# strategy:
|
1472 |
+
# matrix:
|
1473 |
+
# sanitizer: [ADDRESS, THREAD, UNDEFINED]
|
1474 |
+
#
|
1475 |
+
# steps:
|
1476 |
+
# - name: Clone
|
1477 |
+
# uses: actions/checkout@v4
|
1478 |
+
#
|
1479 |
+
# - name: Dependencies
|
1480 |
+
# run: |
|
1481 |
+
# sudo apt-get update
|
1482 |
+
# sudo apt-get install build-essential
|
1483 |
+
# sudo apt-get install cmake
|
1484 |
+
#
|
1485 |
+
# - name: Configure
|
1486 |
+
# run: cmake . -DCMAKE_BUILD_TYPE=Debug -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON
|
1487 |
+
#
|
1488 |
+
# - name: Build
|
1489 |
+
# run: |
|
1490 |
+
# make
|
1491 |
+
#
|
1492 |
+
# windows:
|
1493 |
+
# runs-on: windows-latest
|
1494 |
+
#
|
1495 |
+
# strategy:
|
1496 |
+
# matrix:
|
1497 |
+
# build: [Release]
|
1498 |
+
# arch: [Win32, x64]
|
1499 |
+
# include:
|
1500 |
+
# - arch: Win32
|
1501 |
+
# s2arc: x86
|
1502 |
+
# - arch: x64
|
1503 |
+
# s2arc: x64
|
1504 |
+
#
|
1505 |
+
# steps:
|
1506 |
+
# - name: Clone
|
1507 |
+
# uses: actions/checkout@v4
|
1508 |
+
#
|
1509 |
+
# - name: Add msbuild to PATH
|
1510 |
+
# uses: microsoft/setup-msbuild@v1
|
1511 |
+
#
|
1512 |
+
# - name: Configure
|
1513 |
+
# run: >
|
1514 |
+
# cmake -S . -B ./build -A ${{ matrix.arch }}
|
1515 |
+
# -DCMAKE_BUILD_TYPE=${{ matrix.build }}
|
1516 |
+
#
|
1517 |
+
# - name: Build
|
1518 |
+
# run: |
|
1519 |
+
# cd ./build
|
1520 |
+
# msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }}
|
1521 |
+
#
|
1522 |
+
# - name: Upload binaries
|
1523 |
+
# uses: actions/upload-artifact@v4
|
1524 |
+
# with:
|
1525 |
+
# name: llama-bin-${{ matrix.arch }}
|
1526 |
+
# path: build/bin/${{ matrix.build }}
|
1527 |
+
#
|
1528 |
+
# windows-blas:
|
1529 |
+
# runs-on: windows-latest
|
1530 |
+
#
|
1531 |
+
# strategy:
|
1532 |
+
# matrix:
|
1533 |
+
# build: [Release]
|
1534 |
+
# arch: [Win32, x64]
|
1535 |
+
# blas: [ON]
|
1536 |
+
# include:
|
1537 |
+
# - arch: Win32
|
1538 |
+
# obzip: https://github.com/xianyi/OpenBLAS/releases/download/v0.3.21/OpenBLAS-0.3.21-x86.zip
|
1539 |
+
# s2arc: x86
|
1540 |
+
# - arch: x64
|
1541 |
+
# obzip: https://github.com/xianyi/OpenBLAS/releases/download/v0.3.21/OpenBLAS-0.3.21-x64.zip
|
1542 |
+
# s2arc: x64
|
1543 |
+
#
|
1544 |
+
# steps:
|
1545 |
+
# - name: Clone
|
1546 |
+
# uses: actions/checkout@v4
|
1547 |
+
#
|
1548 |
+
# - name: Add msbuild to PATH
|
1549 |
+
# uses: microsoft/setup-msbuild@v1
|
1550 |
+
#
|
1551 |
+
# - name: Fetch OpenBLAS
|
1552 |
+
# if: matrix.blas == 'ON'
|
1553 |
+
# run: |
|
1554 |
+
# C:/msys64/usr/bin/wget.exe -qO blas.zip ${{ matrix.obzip }}
|
1555 |
+
# 7z x blas.zip -oblas -y
|
1556 |
+
# copy blas/include/cblas.h .
|
1557 |
+
# copy blas/include/openblas_config.h .
|
1558 |
+
# echo "blasdir=$env:GITHUB_WORKSPACE/blas" >> $env:GITHUB_ENV
|
1559 |
+
#
|
1560 |
+
# - name: Configure
|
1561 |
+
# run: >
|
1562 |
+
# cmake -S . -B ./build -A ${{ matrix.arch }}
|
1563 |
+
# -DCMAKE_BUILD_TYPE=${{ matrix.build }}
|
1564 |
+
# -DLLAMA_SUPPORT_OPENBLAS=${{ matrix.blas }}
|
1565 |
+
# -DCMAKE_LIBRARY_PATH="$env:blasdir/lib"
|
1566 |
+
#
|
1567 |
+
# - name: Build
|
1568 |
+
# run: |
|
1569 |
+
# cd ./build
|
1570 |
+
# msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }}
|
1571 |
+
#
|
1572 |
+
# - name: Copy libopenblas.dll
|
1573 |
+
# if: matrix.blas == 'ON'
|
1574 |
+
# run: copy "$env:blasdir/bin/libopenblas.dll" build/bin/${{ matrix.build }}
|
1575 |
+
#
|
1576 |
+
# - name: Upload binaries
|
1577 |
+
# if: matrix.blas == 'ON'
|
1578 |
+
# uses: actions/upload-artifact@v4
|
1579 |
+
# with:
|
1580 |
+
# name: llama-blas-bin-${{ matrix.arch }}
|
1581 |
+
# path: build/bin/${{ matrix.build }}
|
1582 |
+
#
|
1583 |
+
# emscripten:
|
1584 |
+
# runs-on: ubuntu-latest
|
1585 |
+
#
|
1586 |
+
# strategy:
|
1587 |
+
# matrix:
|
1588 |
+
# build: [Release]
|
1589 |
+
#
|
1590 |
+
# steps:
|
1591 |
+
# - name: Clone
|
1592 |
+
# uses: actions/checkout@v4
|
1593 |
+
#
|
1594 |
+
# - name: Dependencies
|
1595 |
+
# run: |
|
1596 |
+
# wget -q https://github.com/emscripten-core/emsdk/archive/master.tar.gz
|
1597 |
+
# tar -xvf master.tar.gz
|
1598 |
+
# emsdk-master/emsdk update
|
1599 |
+
# emsdk-master/emsdk install latest
|
1600 |
+
# emsdk-master/emsdk activate latest
|
1601 |
+
#
|
1602 |
+
# - name: Configure
|
1603 |
+
# run: echo "tmp"
|
1604 |
+
#
|
1605 |
+
# - name: Build
|
1606 |
+
# run: |
|
1607 |
+
# pushd emsdk-master
|
1608 |
+
# source ./emsdk_env.sh
|
1609 |
+
# popd
|
1610 |
+
# emcmake cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }}
|
1611 |
+
# make
|
1612 |
+
|
1613 |
+
openEuler-latest-cmake-cann:
|
1614 |
+
if: ${{ github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'Ascend NPU') }}
|
1615 |
+
defaults:
|
1616 |
+
run:
|
1617 |
+
shell: bash -el {0}
|
1618 |
+
runs-on: ubuntu-24.04-arm
|
1619 |
+
strategy:
|
1620 |
+
matrix:
|
1621 |
+
cann:
|
1622 |
+
- '8.0.rc3.beta1-910b-openeuler22.03-py3.10'
|
1623 |
+
device:
|
1624 |
+
- 'ascend910b3'
|
1625 |
+
build:
|
1626 |
+
- 'Release'
|
1627 |
+
container: ascendai/cann:${{ matrix.cann }}
|
1628 |
+
steps:
|
1629 |
+
- name: Checkout
|
1630 |
+
uses: actions/checkout@v4
|
1631 |
+
|
1632 |
+
- name: Dependencies
|
1633 |
+
run: |
|
1634 |
+
yum update -y
|
1635 |
+
yum install -y git gcc gcc-c++ make cmake
|
1636 |
+
|
1637 |
+
- name: Build
|
1638 |
+
run: |
|
1639 |
+
export LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/$(uname -m)-linux/devlib/:${LD_LIBRARY_PATH}
|
1640 |
+
|
1641 |
+
cmake -S . -B build \
|
1642 |
+
-DCMAKE_BUILD_TYPE=${{ matrix.build }} \
|
1643 |
+
-DGGML_CANN=on \
|
1644 |
+
-DSOC_TYPE=${{ matrix.device }}
|
1645 |
+
cmake --build build -j $(nproc)
|
llama.cpp/.github/workflows/close-issue.yml
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: Close inactive issues
|
2 |
+
on:
|
3 |
+
schedule:
|
4 |
+
- cron: "42 0 * * *"
|
5 |
+
|
6 |
+
# Fine-grant permission
|
7 |
+
# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
|
8 |
+
permissions:
|
9 |
+
issues: write
|
10 |
+
|
11 |
+
jobs:
|
12 |
+
close-issues:
|
13 |
+
runs-on: ubuntu-latest
|
14 |
+
permissions:
|
15 |
+
issues: write
|
16 |
+
pull-requests: write
|
17 |
+
steps:
|
18 |
+
- uses: actions/stale@v5
|
19 |
+
with:
|
20 |
+
exempt-issue-labels: "refactor,help wanted,good first issue,research,bug,roadmap"
|
21 |
+
days-before-issue-stale: 30
|
22 |
+
days-before-issue-close: 14
|
23 |
+
stale-issue-label: "stale"
|
24 |
+
close-issue-message: "This issue was closed because it has been inactive for 14 days since being marked as stale."
|
25 |
+
days-before-pr-stale: -1
|
26 |
+
days-before-pr-close: -1
|
27 |
+
operations-per-run: 10000
|
28 |
+
repo-token: ${{ secrets.GITHUB_TOKEN }}
|
llama.cpp/.github/workflows/docker.yml
ADDED
@@ -0,0 +1,173 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# This workflow uses actions that are not certified by GitHub.
|
2 |
+
# They are provided by a third-party and are governed by
|
3 |
+
# separate terms of service, privacy policy, and support
|
4 |
+
# documentation.
|
5 |
+
|
6 |
+
# GitHub recommends pinning actions to a commit SHA.
|
7 |
+
# To get a newer version, you will need to update the SHA.
|
8 |
+
# You can also reference a tag or branch, but the action may change without warning.
|
9 |
+
|
10 |
+
name: Publish Docker image
|
11 |
+
|
12 |
+
on:
|
13 |
+
workflow_dispatch: # allows manual triggering
|
14 |
+
schedule:
|
15 |
+
# Rebuild daily rather than on every push because it is expensive
|
16 |
+
- cron: '12 4 * * *'
|
17 |
+
|
18 |
+
concurrency:
|
19 |
+
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
20 |
+
cancel-in-progress: true
|
21 |
+
|
22 |
+
# Fine-grant permission
|
23 |
+
# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
|
24 |
+
permissions:
|
25 |
+
packages: write
|
26 |
+
|
27 |
+
jobs:
|
28 |
+
push_to_registry:
|
29 |
+
name: Push Docker image to Docker Hub
|
30 |
+
|
31 |
+
runs-on: ubuntu-22.04
|
32 |
+
env:
|
33 |
+
COMMIT_SHA: ${{ github.sha }}
|
34 |
+
strategy:
|
35 |
+
fail-fast: false
|
36 |
+
matrix:
|
37 |
+
config:
|
38 |
+
# Multi-stage build
|
39 |
+
- { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, freediskspace: false}
|
40 |
+
- { tag: "cuda", dockerfile: ".devops/cuda.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false}
|
41 |
+
- { tag: "musa", dockerfile: ".devops/musa.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false}
|
42 |
+
- { tag: "intel", dockerfile: ".devops/intel.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false}
|
43 |
+
- { tag: "vulkan", dockerfile: ".devops/vulkan.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false}
|
44 |
+
# Note: the rocm images are failing due to a compiler error and are disabled until this is fixed to allow the workflow to complete
|
45 |
+
#- {tag: "rocm", dockerfile: ".devops/rocm.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, freediskspace: true }
|
46 |
+
steps:
|
47 |
+
- name: Check out the repo
|
48 |
+
uses: actions/checkout@v4
|
49 |
+
with:
|
50 |
+
fetch-depth: 0 # preserve git history, so we can determine the build number
|
51 |
+
|
52 |
+
- name: Set up QEMU
|
53 |
+
uses: docker/setup-qemu-action@v3
|
54 |
+
|
55 |
+
- name: Set up Docker Buildx
|
56 |
+
uses: docker/setup-buildx-action@v3
|
57 |
+
|
58 |
+
- name: Log in to Docker Hub
|
59 |
+
uses: docker/login-action@v2
|
60 |
+
with:
|
61 |
+
registry: ghcr.io
|
62 |
+
username: ${{ github.repository_owner }}
|
63 |
+
password: ${{ secrets.GITHUB_TOKEN }}
|
64 |
+
|
65 |
+
- name: Determine tag name
|
66 |
+
id: tag
|
67 |
+
shell: bash
|
68 |
+
run: |
|
69 |
+
BUILD_NUMBER="$(git rev-list --count HEAD)"
|
70 |
+
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
|
71 |
+
REPO_OWNER="${GITHUB_REPOSITORY_OWNER@L}" # to lower case
|
72 |
+
REPO_NAME="${{ github.event.repository.name }}"
|
73 |
+
|
74 |
+
# determine tag name postfix (build number, commit hash)
|
75 |
+
if [[ "${{ env.GITHUB_BRANCH_NAME }}" == "master" ]]; then
|
76 |
+
TAG_POSTFIX="-b${BUILD_NUMBER}"
|
77 |
+
else
|
78 |
+
SAFE_NAME=$(echo "${{ env.GITHUB_BRANCH_NAME }}" | tr '/' '-')
|
79 |
+
TAG_POSTFIX="-${SAFE_NAME}-${SHORT_HASH}"
|
80 |
+
fi
|
81 |
+
# list all tags possible
|
82 |
+
if [[ "${{ matrix.config.tag }}" == "cpu" ]]; then
|
83 |
+
TYPE=""
|
84 |
+
else
|
85 |
+
TYPE="-${{ matrix.config.tag }}"
|
86 |
+
fi
|
87 |
+
PREFIX="ghcr.io/${REPO_OWNER}/${REPO_NAME}:"
|
88 |
+
FULLTAGS="${PREFIX}full${TYPE},${PREFIX}full${TYPE}${TAG_POSTFIX}"
|
89 |
+
LIGHTTAGS="${PREFIX}light${TYPE},${PREFIX}light${TYPE}${TAG_POSTFIX}"
|
90 |
+
SERVERTAGS="${PREFIX}server${TYPE},${PREFIX}server${TYPE}${TAG_POSTFIX}"
|
91 |
+
echo "full_output_tags=$FULLTAGS" >> $GITHUB_OUTPUT
|
92 |
+
echo "light_output_tags=$LIGHTTAGS" >> $GITHUB_OUTPUT
|
93 |
+
echo "server_output_tags=$SERVERTAGS" >> $GITHUB_OUTPUT
|
94 |
+
echo "full_output_tags=$FULLTAGS" # print out for debugging
|
95 |
+
echo "light_output_tags=$LIGHTTAGS" # print out for debugging
|
96 |
+
echo "server_output_tags=$SERVERTAGS" # print out for debugging
|
97 |
+
env:
|
98 |
+
GITHUB_BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
|
99 |
+
GITHUB_REPOSITORY_OWNER: '${{ github.repository_owner }}'
|
100 |
+
|
101 |
+
- name: Free Disk Space (Ubuntu)
|
102 |
+
if: ${{ matrix.config.free_disk_space == true }}
|
103 |
+
uses: ggml-org/[email protected]
|
104 |
+
with:
|
105 |
+
# this might remove tools that are actually needed,
|
106 |
+
# if set to "true" but frees about 6 GB
|
107 |
+
tool-cache: false
|
108 |
+
|
109 |
+
# all of these default to true, but feel free to set to
|
110 |
+
# "false" if necessary for your workflow
|
111 |
+
android: true
|
112 |
+
dotnet: true
|
113 |
+
haskell: true
|
114 |
+
large-packages: true
|
115 |
+
docker-images: true
|
116 |
+
swap-storage: true
|
117 |
+
|
118 |
+
- name: Build and push Full Docker image (tagged + versioned)
|
119 |
+
if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.full == true }}
|
120 |
+
uses: docker/build-push-action@v6
|
121 |
+
with:
|
122 |
+
context: .
|
123 |
+
push: true
|
124 |
+
platforms: ${{ matrix.config.platforms }}
|
125 |
+
# tag list is generated from step above
|
126 |
+
tags: ${{ steps.tag.outputs.full_output_tags }}
|
127 |
+
file: ${{ matrix.config.dockerfile }}
|
128 |
+
target: full
|
129 |
+
provenance: false
|
130 |
+
# using github experimental cache
|
131 |
+
cache-from: type=gha
|
132 |
+
cache-to: type=gha,mode=max
|
133 |
+
# return to this if the experimental github cache is having issues
|
134 |
+
#cache-to: type=local,dest=/tmp/.buildx-cache
|
135 |
+
#cache-from: type=local,src=/tmp/.buildx-cache
|
136 |
+
|
137 |
+
- name: Build and push Light Docker image (tagged + versioned)
|
138 |
+
if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.light == true }}
|
139 |
+
uses: docker/build-push-action@v6
|
140 |
+
with:
|
141 |
+
context: .
|
142 |
+
push: true
|
143 |
+
platforms: ${{ matrix.config.platforms }}
|
144 |
+
# tag list is generated from step above
|
145 |
+
tags: ${{ steps.tag.outputs.light_output_tags }}
|
146 |
+
file: ${{ matrix.config.dockerfile }}
|
147 |
+
target: light
|
148 |
+
provenance: false
|
149 |
+
# using github experimental cache
|
150 |
+
cache-from: type=gha
|
151 |
+
cache-to: type=gha,mode=max
|
152 |
+
# return to this if the experimental github cache is having issues
|
153 |
+
#cache-to: type=local,dest=/tmp/.buildx-cache
|
154 |
+
#cache-from: type=local,src=/tmp/.buildx-cache
|
155 |
+
|
156 |
+
- name: Build and push Server Docker image (tagged + versioned)
|
157 |
+
if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.server == true }}
|
158 |
+
uses: docker/build-push-action@v6
|
159 |
+
with:
|
160 |
+
context: .
|
161 |
+
push: true
|
162 |
+
platforms: ${{ matrix.config.platforms }}
|
163 |
+
# tag list is generated from step above
|
164 |
+
tags: ${{ steps.tag.outputs.server_output_tags }}
|
165 |
+
file: ${{ matrix.config.dockerfile }}
|
166 |
+
target: server
|
167 |
+
provenance: false
|
168 |
+
# using github experimental cache
|
169 |
+
cache-from: type=gha
|
170 |
+
cache-to: type=gha,mode=max
|
171 |
+
# return to this if the experimental github cache is having issues
|
172 |
+
#cache-to: type=local,dest=/tmp/.buildx-cache
|
173 |
+
#cache-from: type=local,src=/tmp/.buildx-cache
|
llama.cpp/.github/workflows/editorconfig.yml
ADDED
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: EditorConfig Checker
|
2 |
+
|
3 |
+
on:
|
4 |
+
workflow_dispatch: # allows manual triggering
|
5 |
+
inputs:
|
6 |
+
create_release:
|
7 |
+
description: 'Create new release'
|
8 |
+
required: true
|
9 |
+
type: boolean
|
10 |
+
push:
|
11 |
+
branches:
|
12 |
+
- master
|
13 |
+
pull_request:
|
14 |
+
branches:
|
15 |
+
- master
|
16 |
+
|
17 |
+
concurrency:
|
18 |
+
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
19 |
+
cancel-in-progress: true
|
20 |
+
|
21 |
+
jobs:
|
22 |
+
editorconfig:
|
23 |
+
runs-on: ubuntu-latest
|
24 |
+
steps:
|
25 |
+
- uses: actions/checkout@v4
|
26 |
+
- uses: editorconfig-checker/action-editorconfig-checker@v2
|
27 |
+
with:
|
28 |
+
version: v3.0.3
|
29 |
+
- run: editorconfig-checker
|
llama.cpp/.github/workflows/gguf-publish.yml
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# This workflow will upload a Python Package using Twine when a GGUF release is created
|
2 |
+
# For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
|
3 |
+
|
4 |
+
# See `gguf-py/README.md` for how to make a release.
|
5 |
+
|
6 |
+
# This workflow uses actions that are not certified by GitHub.
|
7 |
+
# They are provided by a third-party and are governed by
|
8 |
+
# separate terms of service, privacy policy, and support
|
9 |
+
# documentation.
|
10 |
+
|
11 |
+
name: Upload Python Package
|
12 |
+
|
13 |
+
on:
|
14 |
+
workflow_dispatch:
|
15 |
+
push:
|
16 |
+
# Pattern matched against refs/tags
|
17 |
+
tags:
|
18 |
+
- 'gguf-v*' # Push events to every version tag
|
19 |
+
|
20 |
+
|
21 |
+
jobs:
|
22 |
+
deploy:
|
23 |
+
|
24 |
+
runs-on: ubuntu-latest
|
25 |
+
|
26 |
+
steps:
|
27 |
+
- uses: actions/checkout@v4
|
28 |
+
- name: Set up Python
|
29 |
+
uses: actions/setup-python@v5
|
30 |
+
with:
|
31 |
+
python-version: '3.9.x'
|
32 |
+
- name: Install dependencies
|
33 |
+
run: |
|
34 |
+
cd gguf-py
|
35 |
+
python -m pip install poetry
|
36 |
+
poetry install
|
37 |
+
|
38 |
+
- name: Build package
|
39 |
+
run: cd gguf-py && poetry build
|
40 |
+
- name: Publish package
|
41 |
+
uses: pypa/gh-action-pypi-publish@release/v1
|
42 |
+
with:
|
43 |
+
password: ${{ secrets.PYPI_API_TOKEN }}
|
44 |
+
packages-dir: gguf-py/dist
|
llama.cpp/.github/workflows/labeler.yml
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: "Pull Request Labeler"
|
2 |
+
on:
|
3 |
+
- pull_request_target
|
4 |
+
|
5 |
+
jobs:
|
6 |
+
labeler:
|
7 |
+
permissions:
|
8 |
+
contents: read
|
9 |
+
pull-requests: write
|
10 |
+
runs-on: ubuntu-latest
|
11 |
+
steps:
|
12 |
+
- uses: actions/checkout@v4
|
13 |
+
with:
|
14 |
+
repository: "ggerganov/llama.cpp"
|
15 |
+
- uses: actions/labeler@v5
|
16 |
+
with:
|
17 |
+
configuration-path: '.github/labeler.yml'
|
llama.cpp/.github/workflows/python-check-requirements.yml
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: Python check requirements.txt
|
2 |
+
|
3 |
+
on:
|
4 |
+
push:
|
5 |
+
paths:
|
6 |
+
- '.github/workflows/python-check-requirements.yml'
|
7 |
+
- 'scripts/check-requirements.sh'
|
8 |
+
- 'convert*.py'
|
9 |
+
- '**/requirements*.txt'
|
10 |
+
pull_request:
|
11 |
+
paths:
|
12 |
+
- '.github/workflows/python-check-requirements.yml'
|
13 |
+
- 'scripts/check-requirements.sh'
|
14 |
+
- 'convert*.py'
|
15 |
+
- '**/requirements*.txt'
|
16 |
+
|
17 |
+
concurrency:
|
18 |
+
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
19 |
+
cancel-in-progress: true
|
20 |
+
|
21 |
+
jobs:
|
22 |
+
python-check-requirements:
|
23 |
+
runs-on: ubuntu-latest
|
24 |
+
name: check-requirements
|
25 |
+
steps:
|
26 |
+
- name: Check out source repository
|
27 |
+
uses: actions/checkout@v4
|
28 |
+
- name: Set up Python environment
|
29 |
+
uses: actions/setup-python@v5
|
30 |
+
with:
|
31 |
+
python-version: "3.11"
|
32 |
+
- name: Run check-requirements.sh script
|
33 |
+
run: bash scripts/check-requirements.sh
|
llama.cpp/.github/workflows/python-lint.yml
ADDED
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: flake8 Lint
|
2 |
+
|
3 |
+
on:
|
4 |
+
push:
|
5 |
+
branches:
|
6 |
+
- master
|
7 |
+
paths: ['.github/workflows/python-lint.yml', '**/*.py']
|
8 |
+
pull_request:
|
9 |
+
types: [opened, synchronize, reopened]
|
10 |
+
paths: ['.github/workflows/python-lint.yml', '**/*.py']
|
11 |
+
|
12 |
+
concurrency:
|
13 |
+
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
14 |
+
cancel-in-progress: true
|
15 |
+
|
16 |
+
jobs:
|
17 |
+
flake8-lint:
|
18 |
+
runs-on: ubuntu-latest
|
19 |
+
name: Lint
|
20 |
+
steps:
|
21 |
+
- name: Check out source repository
|
22 |
+
uses: actions/checkout@v4
|
23 |
+
- name: Set up Python environment
|
24 |
+
uses: actions/setup-python@v5
|
25 |
+
with:
|
26 |
+
python-version: "3.11"
|
27 |
+
- name: flake8 Lint
|
28 |
+
uses: py-actions/flake8@v2
|
29 |
+
with:
|
30 |
+
plugins: "flake8-no-print"
|
llama.cpp/.github/workflows/python-type-check.yml
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: Python Type-Check
|
2 |
+
|
3 |
+
on:
|
4 |
+
push:
|
5 |
+
paths:
|
6 |
+
- '.github/workflows/python-type-check.yml'
|
7 |
+
- 'pyrightconfig.json'
|
8 |
+
- '**.py'
|
9 |
+
- '**/requirements*.txt'
|
10 |
+
pull_request:
|
11 |
+
paths:
|
12 |
+
- '.github/workflows/python-type-check.yml'
|
13 |
+
- 'pyrightconfig.json'
|
14 |
+
- '**.py'
|
15 |
+
- '**/requirements*.txt'
|
16 |
+
|
17 |
+
concurrency:
|
18 |
+
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
19 |
+
cancel-in-progress: true
|
20 |
+
|
21 |
+
jobs:
|
22 |
+
python-type-check:
|
23 |
+
runs-on: ubuntu-latest
|
24 |
+
name: pyright type-check
|
25 |
+
steps:
|
26 |
+
- name: Check out source repository
|
27 |
+
uses: actions/checkout@v4
|
28 |
+
- name: Set up Python environment
|
29 |
+
uses: actions/setup-python@v5
|
30 |
+
with:
|
31 |
+
python-version: "3.11"
|
32 |
+
- name: Install Python dependencies
|
33 |
+
# TODO: use a venv
|
34 |
+
run: pip install -r requirements/requirements-all.txt
|
35 |
+
- name: Type-check with Pyright
|
36 |
+
uses: jakebailey/pyright-action@v2
|
37 |
+
with:
|
38 |
+
version: 1.1.382
|
39 |
+
level: warning
|
40 |
+
warnings: true
|
llama.cpp/.github/workflows/server.yml
ADDED
@@ -0,0 +1,239 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Server build and tests
|
2 |
+
name: Server
|
3 |
+
|
4 |
+
on:
|
5 |
+
workflow_dispatch: # allows manual triggering
|
6 |
+
inputs:
|
7 |
+
sha:
|
8 |
+
description: 'Commit SHA1 to build'
|
9 |
+
required: false
|
10 |
+
type: string
|
11 |
+
slow_tests:
|
12 |
+
description: 'Run slow tests'
|
13 |
+
required: true
|
14 |
+
type: boolean
|
15 |
+
push:
|
16 |
+
branches:
|
17 |
+
- master
|
18 |
+
paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*']
|
19 |
+
pull_request:
|
20 |
+
types: [opened, synchronize, reopened]
|
21 |
+
paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*']
|
22 |
+
|
23 |
+
env:
|
24 |
+
LLAMA_LOG_COLORS: 1
|
25 |
+
LLAMA_LOG_PREFIX: 1
|
26 |
+
LLAMA_LOG_TIMESTAMPS: 1
|
27 |
+
LLAMA_LOG_VERBOSITY: 10
|
28 |
+
|
29 |
+
concurrency:
|
30 |
+
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }}
|
31 |
+
cancel-in-progress: true
|
32 |
+
|
33 |
+
jobs:
|
34 |
+
server:
|
35 |
+
runs-on: ubuntu-latest
|
36 |
+
|
37 |
+
strategy:
|
38 |
+
matrix:
|
39 |
+
sanitizer: [ADDRESS, UNDEFINED] # THREAD is broken
|
40 |
+
build_type: [RelWithDebInfo]
|
41 |
+
include:
|
42 |
+
- build_type: Release
|
43 |
+
sanitizer: ""
|
44 |
+
fail-fast: false # While -DLLAMA_SANITIZE_THREAD=ON is broken
|
45 |
+
|
46 |
+
steps:
|
47 |
+
- name: Dependencies
|
48 |
+
id: depends
|
49 |
+
run: |
|
50 |
+
sudo apt-get update
|
51 |
+
sudo apt-get -y install \
|
52 |
+
build-essential \
|
53 |
+
xxd \
|
54 |
+
git \
|
55 |
+
cmake \
|
56 |
+
curl \
|
57 |
+
wget \
|
58 |
+
language-pack-en \
|
59 |
+
libcurl4-openssl-dev
|
60 |
+
|
61 |
+
- name: Clone
|
62 |
+
id: checkout
|
63 |
+
uses: actions/checkout@v4
|
64 |
+
with:
|
65 |
+
fetch-depth: 0
|
66 |
+
ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
|
67 |
+
|
68 |
+
- name: Python setup
|
69 |
+
id: setup_python
|
70 |
+
uses: actions/setup-python@v5
|
71 |
+
with:
|
72 |
+
python-version: '3.11'
|
73 |
+
|
74 |
+
- name: Tests dependencies
|
75 |
+
id: test_dependencies
|
76 |
+
run: |
|
77 |
+
pip install -r examples/server/tests/requirements.txt
|
78 |
+
|
79 |
+
# Setup nodejs (to be used for verifying bundled index.html)
|
80 |
+
- uses: actions/setup-node@v4
|
81 |
+
with:
|
82 |
+
node-version: '22.11.0'
|
83 |
+
|
84 |
+
- name: WebUI - Install dependencies
|
85 |
+
id: webui_lint
|
86 |
+
run: |
|
87 |
+
cd examples/server/webui
|
88 |
+
npm ci
|
89 |
+
|
90 |
+
- name: WebUI - Check code format
|
91 |
+
id: webui_format
|
92 |
+
run: |
|
93 |
+
git config --global --add safe.directory $(realpath .)
|
94 |
+
cd examples/server/webui
|
95 |
+
git status
|
96 |
+
|
97 |
+
npm run format
|
98 |
+
git status
|
99 |
+
modified_files="$(git status -s)"
|
100 |
+
echo "Modified files: ${modified_files}"
|
101 |
+
if [ -n "${modified_files}" ]; then
|
102 |
+
echo "Files do not follow coding style. To fix: npm run format"
|
103 |
+
echo "${modified_files}"
|
104 |
+
exit 1
|
105 |
+
fi
|
106 |
+
|
107 |
+
- name: Verify bundled index.html
|
108 |
+
id: verify_server_index_html
|
109 |
+
run: |
|
110 |
+
git config --global --add safe.directory $(realpath .)
|
111 |
+
cd examples/server/webui
|
112 |
+
git status
|
113 |
+
|
114 |
+
npm run build
|
115 |
+
git status
|
116 |
+
modified_files="$(git status -s)"
|
117 |
+
echo "Modified files: ${modified_files}"
|
118 |
+
if [ -n "${modified_files}" ]; then
|
119 |
+
echo "Repository is dirty or server/webui is not built as expected"
|
120 |
+
echo "Hint: You may need to follow Web UI build guide in server/README.md"
|
121 |
+
echo "${modified_files}"
|
122 |
+
exit 1
|
123 |
+
fi
|
124 |
+
|
125 |
+
- name: Build (no OpenMP)
|
126 |
+
id: cmake_build_no_openmp
|
127 |
+
if: ${{ matrix.sanitizer == 'THREAD' }}
|
128 |
+
run: |
|
129 |
+
cmake -B build \
|
130 |
+
-DGGML_NATIVE=OFF \
|
131 |
+
-DLLAMA_BUILD_SERVER=ON \
|
132 |
+
-DLLAMA_CURL=ON \
|
133 |
+
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
|
134 |
+
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \
|
135 |
+
-DGGML_OPENMP=OFF ;
|
136 |
+
cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
|
137 |
+
|
138 |
+
- name: Build (sanitizers)
|
139 |
+
id: cmake_build_sanitizers
|
140 |
+
if: ${{ matrix.sanitizer != '' && matrix.sanitizer != 'THREAD' }}
|
141 |
+
run: |
|
142 |
+
cmake -B build \
|
143 |
+
-DGGML_NATIVE=OFF \
|
144 |
+
-DLLAMA_BUILD_SERVER=ON \
|
145 |
+
-DLLAMA_CURL=ON \
|
146 |
+
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
|
147 |
+
-DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON ;
|
148 |
+
cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
|
149 |
+
|
150 |
+
- name: Build (sanitizers)
|
151 |
+
id: cmake_build
|
152 |
+
if: ${{ matrix.sanitizer == '' }}
|
153 |
+
run: |
|
154 |
+
cmake -B build \
|
155 |
+
-DGGML_NATIVE=OFF \
|
156 |
+
-DLLAMA_BUILD_SERVER=ON \
|
157 |
+
-DLLAMA_CURL=ON \
|
158 |
+
-DCMAKE_BUILD_TYPE=${{ matrix.build_type }} ;
|
159 |
+
cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
|
160 |
+
|
161 |
+
- name: Tests
|
162 |
+
id: server_integration_tests
|
163 |
+
if: ${{ matrix.sanitizer == '' }}
|
164 |
+
run: |
|
165 |
+
cd examples/server/tests
|
166 |
+
./tests.sh
|
167 |
+
|
168 |
+
- name: Tests (sanitizers)
|
169 |
+
id: server_integration_tests_sanitizers
|
170 |
+
if: ${{ matrix.sanitizer != '' }}
|
171 |
+
run: |
|
172 |
+
cd examples/server/tests
|
173 |
+
LLAMA_SANITIZE=1 ./tests.sh
|
174 |
+
|
175 |
+
- name: Slow tests
|
176 |
+
id: server_integration_tests_slow
|
177 |
+
if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
|
178 |
+
run: |
|
179 |
+
cd examples/server/tests
|
180 |
+
SLOW_TESTS=1 ./tests.sh
|
181 |
+
|
182 |
+
|
183 |
+
server-windows:
|
184 |
+
runs-on: windows-2019
|
185 |
+
|
186 |
+
steps:
|
187 |
+
- name: Clone
|
188 |
+
id: checkout
|
189 |
+
uses: actions/checkout@v4
|
190 |
+
with:
|
191 |
+
fetch-depth: 0
|
192 |
+
ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
|
193 |
+
|
194 |
+
- name: libCURL
|
195 |
+
id: get_libcurl
|
196 |
+
env:
|
197 |
+
CURL_VERSION: 8.6.0_6
|
198 |
+
run: |
|
199 |
+
curl.exe -o $env:RUNNER_TEMP/curl.zip -L "https://curl.se/windows/dl-${env:CURL_VERSION}/curl-${env:CURL_VERSION}-win64-mingw.zip"
|
200 |
+
mkdir $env:RUNNER_TEMP/libcurl
|
201 |
+
tar.exe -xvf $env:RUNNER_TEMP/curl.zip --strip-components=1 -C $env:RUNNER_TEMP/libcurl
|
202 |
+
|
203 |
+
- name: Build
|
204 |
+
id: cmake_build
|
205 |
+
run: |
|
206 |
+
cmake -B build -DLLAMA_CURL=ON -DCURL_LIBRARY="$env:RUNNER_TEMP/libcurl/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:RUNNER_TEMP/libcurl/include"
|
207 |
+
cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS} --target llama-server
|
208 |
+
|
209 |
+
- name: Python setup
|
210 |
+
id: setup_python
|
211 |
+
uses: actions/setup-python@v5
|
212 |
+
with:
|
213 |
+
python-version: '3.11'
|
214 |
+
|
215 |
+
- name: Tests dependencies
|
216 |
+
id: test_dependencies
|
217 |
+
run: |
|
218 |
+
pip install -r examples/server/tests/requirements.txt
|
219 |
+
|
220 |
+
- name: Copy Libcurl
|
221 |
+
id: prepare_libcurl
|
222 |
+
run: |
|
223 |
+
cp $env:RUNNER_TEMP/libcurl/bin/libcurl-x64.dll ./build/bin/Release/libcurl-x64.dll
|
224 |
+
|
225 |
+
- name: Tests
|
226 |
+
id: server_integration_tests
|
227 |
+
if: ${{ !matrix.disabled_on_pr || !github.event.pull_request }}
|
228 |
+
run: |
|
229 |
+
cd examples/server/tests
|
230 |
+
$env:PYTHONIOENCODING = ":replace"
|
231 |
+
pytest -v -x -m "not slow"
|
232 |
+
|
233 |
+
- name: Slow tests
|
234 |
+
id: server_integration_tests_slow
|
235 |
+
if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
|
236 |
+
run: |
|
237 |
+
cd examples/server/tests
|
238 |
+
$env:SLOW_TESTS = "1"
|
239 |
+
pytest -v -x
|