llamacpp
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +23 -0
- llama.cpp/.clang-format +161 -0
- llama.cpp/.clang-tidy +26 -0
- llama.cpp/.devops/cloud-v-pipeline +22 -0
- llama.cpp/.devops/full-cuda.Dockerfile +33 -0
- llama.cpp/.devops/full-musa.Dockerfile +33 -0
- llama.cpp/.devops/full-rocm.Dockerfile +50 -0
- llama.cpp/.devops/full.Dockerfile +25 -0
- llama.cpp/.devops/llama-cli-cann.Dockerfile +44 -0
- llama.cpp/.devops/llama-cli-cuda.Dockerfile +38 -0
- llama.cpp/.devops/llama-cli-intel.Dockerfile +28 -0
- llama.cpp/.devops/llama-cli-musa.Dockerfile +38 -0
- llama.cpp/.devops/llama-cli-rocm.Dockerfile +45 -0
- llama.cpp/.devops/llama-cli-vulkan.Dockerfile +27 -0
- llama.cpp/.devops/llama-cli.Dockerfile +23 -0
- llama.cpp/.devops/llama-cpp-cuda.srpm.spec +83 -0
- llama.cpp/.devops/llama-cpp.srpm.spec +85 -0
- llama.cpp/.devops/llama-server-cuda.Dockerfile +43 -0
- llama.cpp/.devops/llama-server-intel.Dockerfile +34 -0
- llama.cpp/.devops/llama-server-musa.Dockerfile +43 -0
- llama.cpp/.devops/llama-server-rocm.Dockerfile +54 -0
- llama.cpp/.devops/llama-server-vulkan.Dockerfile +31 -0
- llama.cpp/.devops/llama-server.Dockerfile +41 -0
- llama.cpp/.devops/nix/apps.nix +21 -0
- llama.cpp/.devops/nix/devshells.nix +52 -0
- llama.cpp/.devops/nix/docker.nix +37 -0
- llama.cpp/.devops/nix/jetson-support.nix +39 -0
- llama.cpp/.devops/nix/nixpkgs-instances.nix +45 -0
- llama.cpp/.devops/nix/package-gguf-py.nix +36 -0
- llama.cpp/.devops/nix/package.nix +246 -0
- llama.cpp/.devops/nix/python-scripts.nix +66 -0
- llama.cpp/.devops/nix/scope.nix +41 -0
- llama.cpp/.devops/nix/sif.nix +27 -0
- llama.cpp/.devops/tools.sh +41 -0
- llama.cpp/.dockerignore +20 -0
- llama.cpp/.ecrc +6 -0
- llama.cpp/.editorconfig +42 -0
- llama.cpp/.flake8 +17 -0
- llama.cpp/.github/ISSUE_TEMPLATE/010-bug-compilation.yml +77 -0
- llama.cpp/.github/ISSUE_TEMPLATE/011-bug-results.yml +101 -0
- llama.cpp/.github/ISSUE_TEMPLATE/019-bug-misc.yml +81 -0
- llama.cpp/.github/ISSUE_TEMPLATE/020-enhancement.yml +51 -0
- llama.cpp/.github/ISSUE_TEMPLATE/030-research.yml +52 -0
- llama.cpp/.github/ISSUE_TEMPLATE/040-refactor.yml +28 -0
- llama.cpp/.github/ISSUE_TEMPLATE/config.yml +11 -0
- llama.cpp/.github/labeler.yml +86 -0
- llama.cpp/.github/pull_request_template.md +1 -0
- llama.cpp/.github/workflows/bench.yml.disabled +315 -0
- llama.cpp/.github/workflows/build.yml +1416 -0
- llama.cpp/.github/workflows/close-issue.yml +28 -0
.gitattributes
CHANGED
@@ -35,3 +35,26 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
llama_lora_model_1/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
37 |
outputs/checkpoint-100/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
llama_lora_model_1/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
37 |
outputs/checkpoint-100/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
38 |
+
llama.cpp/build/bin/llama-export-lora filter=lfs diff=lfs merge=lfs -text
|
39 |
+
llama.cpp/build/bin/llama-imatrix filter=lfs diff=lfs merge=lfs -text
|
40 |
+
llama.cpp/build/bin/llama-llava-cli filter=lfs diff=lfs merge=lfs -text
|
41 |
+
llama.cpp/build/bin/llama-minicpmv-cli filter=lfs diff=lfs merge=lfs -text
|
42 |
+
llama.cpp/build/bin/llama-perplexity filter=lfs diff=lfs merge=lfs -text
|
43 |
+
llama.cpp/build/bin/llama-server filter=lfs diff=lfs merge=lfs -text
|
44 |
+
llama.cpp/build/common/libcommon.a filter=lfs diff=lfs merge=lfs -text
|
45 |
+
llama.cpp/build/examples/server/CMakeFiles/llama-server.dir/server.cpp.o filter=lfs diff=lfs merge=lfs -text
|
46 |
+
llama.cpp/build/src/CMakeFiles/llama.dir/llama.cpp.o filter=lfs diff=lfs merge=lfs -text
|
47 |
+
llama.cpp/build/src/libllama.so filter=lfs diff=lfs merge=lfs -text
|
48 |
+
llama.cpp/models/ggml-vocab-aquila.gguf filter=lfs diff=lfs merge=lfs -text
|
49 |
+
llama.cpp/models/ggml-vocab-baichuan.gguf filter=lfs diff=lfs merge=lfs -text
|
50 |
+
llama.cpp/models/ggml-vocab-command-r.gguf filter=lfs diff=lfs merge=lfs -text
|
51 |
+
llama.cpp/models/ggml-vocab-deepseek-coder.gguf filter=lfs diff=lfs merge=lfs -text
|
52 |
+
llama.cpp/models/ggml-vocab-deepseek-llm.gguf filter=lfs diff=lfs merge=lfs -text
|
53 |
+
llama.cpp/models/ggml-vocab-falcon.gguf filter=lfs diff=lfs merge=lfs -text
|
54 |
+
llama.cpp/models/ggml-vocab-gpt-2.gguf filter=lfs diff=lfs merge=lfs -text
|
55 |
+
llama.cpp/models/ggml-vocab-gpt-neox.gguf filter=lfs diff=lfs merge=lfs -text
|
56 |
+
llama.cpp/models/ggml-vocab-llama-bpe.gguf filter=lfs diff=lfs merge=lfs -text
|
57 |
+
llama.cpp/models/ggml-vocab-mpt.gguf filter=lfs diff=lfs merge=lfs -text
|
58 |
+
llama.cpp/models/ggml-vocab-qwen2.gguf filter=lfs diff=lfs merge=lfs -text
|
59 |
+
llama.cpp/models/ggml-vocab-refact.gguf filter=lfs diff=lfs merge=lfs -text
|
60 |
+
llama.cpp/models/ggml-vocab-starcoder.gguf filter=lfs diff=lfs merge=lfs -text
|
llama.cpp/.clang-format
ADDED
@@ -0,0 +1,161 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
Language: Cpp
|
3 |
+
AlignAfterOpenBracket: Align
|
4 |
+
AlignArrayOfStructures: Left
|
5 |
+
AlignConsecutiveAssignments: AcrossComments
|
6 |
+
AlignConsecutiveBitFields: AcrossComments
|
7 |
+
AlignConsecutiveDeclarations: AcrossComments
|
8 |
+
AlignConsecutiveMacros: AcrossComments
|
9 |
+
# AlignConsecutiveShortCaseStatements: AcrossComments
|
10 |
+
AlignEscapedNewlines: Left # LeftWithLastLine
|
11 |
+
AlignOperands: Align
|
12 |
+
AlignTrailingComments:
|
13 |
+
Kind: Always
|
14 |
+
OverEmptyLines: 1
|
15 |
+
AllowAllArgumentsOnNextLine: true
|
16 |
+
AllowAllParametersOfDeclarationOnNextLine: false
|
17 |
+
# AllowBreakBeforeNoexceptSpecifier: OnlyWithParen
|
18 |
+
AllowShortBlocksOnASingleLine: Never
|
19 |
+
AllowShortCaseLabelsOnASingleLine: false
|
20 |
+
AllowShortFunctionsOnASingleLine: Inline
|
21 |
+
AllowShortIfStatementsOnASingleLine: Never
|
22 |
+
AllowShortLambdasOnASingleLine: Inline
|
23 |
+
AllowShortLoopsOnASingleLine: false
|
24 |
+
AlwaysBreakBeforeMultilineStrings: true
|
25 |
+
BinPackArguments: true
|
26 |
+
BinPackParameters: true # OnePerLine
|
27 |
+
BitFieldColonSpacing: Both
|
28 |
+
BreakBeforeBraces: Custom # Attach
|
29 |
+
BraceWrapping:
|
30 |
+
AfterCaseLabel: true
|
31 |
+
AfterClass: false
|
32 |
+
AfterControlStatement: false
|
33 |
+
AfterEnum: false
|
34 |
+
AfterFunction: false
|
35 |
+
AfterNamespace: false
|
36 |
+
AfterObjCDeclaration: false
|
37 |
+
AfterStruct: false
|
38 |
+
AfterUnion: false
|
39 |
+
AfterExternBlock: false
|
40 |
+
BeforeCatch: false
|
41 |
+
BeforeElse: false
|
42 |
+
BeforeLambdaBody: false
|
43 |
+
BeforeWhile: false
|
44 |
+
IndentBraces: false
|
45 |
+
SplitEmptyFunction: false
|
46 |
+
SplitEmptyRecord: false
|
47 |
+
SplitEmptyNamespace: false
|
48 |
+
# BreakAdjacentStringLiterals: true
|
49 |
+
BreakAfterAttributes: Never
|
50 |
+
BreakBeforeBinaryOperators: None
|
51 |
+
BreakBeforeInlineASMColon: OnlyMultiline
|
52 |
+
BreakBeforeTernaryOperators: false
|
53 |
+
# BreakBinaryOperations: Never
|
54 |
+
BreakConstructorInitializers: AfterColon
|
55 |
+
# BreakFunctionDefinitionParameters: false
|
56 |
+
BreakInheritanceList: AfterComma
|
57 |
+
BreakStringLiterals: true
|
58 |
+
# BreakTemplateDeclarations: Yes
|
59 |
+
ColumnLimit: 120
|
60 |
+
CommentPragmas: '^ IWYU pragma:'
|
61 |
+
CompactNamespaces: false
|
62 |
+
ConstructorInitializerIndentWidth: 4
|
63 |
+
ContinuationIndentWidth: 4
|
64 |
+
Cpp11BracedListStyle: false
|
65 |
+
DerivePointerAlignment: false
|
66 |
+
DisableFormat: false
|
67 |
+
EmptyLineBeforeAccessModifier: Leave
|
68 |
+
EmptyLineAfterAccessModifier: Never
|
69 |
+
ExperimentalAutoDetectBinPacking: false
|
70 |
+
FixNamespaceComments: true
|
71 |
+
IncludeBlocks: Regroup
|
72 |
+
IncludeCategories:
|
73 |
+
- Regex: '^<.*\.h>'
|
74 |
+
Priority: 1
|
75 |
+
SortPriority: 0
|
76 |
+
- Regex: '^<.*'
|
77 |
+
Priority: 2
|
78 |
+
SortPriority: 0
|
79 |
+
- Regex: '.*'
|
80 |
+
Priority: 3
|
81 |
+
SortPriority: 0
|
82 |
+
IncludeIsMainRegex: '([-_](test|unittest))?$'
|
83 |
+
IncludeIsMainSourceRegex: ''
|
84 |
+
IndentAccessModifiers: false
|
85 |
+
IndentCaseBlocks: true
|
86 |
+
IndentCaseLabels: true
|
87 |
+
IndentExternBlock: NoIndent
|
88 |
+
IndentGotoLabels: false
|
89 |
+
IndentPPDirectives: AfterHash
|
90 |
+
IndentWidth: 4
|
91 |
+
IndentWrappedFunctionNames: false
|
92 |
+
InsertBraces: true # NOTE: may lead to incorrect formatting
|
93 |
+
InsertNewlineAtEOF: true
|
94 |
+
JavaScriptQuotes: Leave
|
95 |
+
JavaScriptWrapImports: true
|
96 |
+
KeepEmptyLinesAtTheStartOfBlocks: false
|
97 |
+
LambdaBodyIndentation: Signature
|
98 |
+
LineEnding: LF
|
99 |
+
MacroBlockBegin: ''
|
100 |
+
MacroBlockEnd: ''
|
101 |
+
MaxEmptyLinesToKeep: 1
|
102 |
+
NamespaceIndentation: None
|
103 |
+
ObjCBinPackProtocolList: Auto
|
104 |
+
ObjCBlockIndentWidth: 4
|
105 |
+
ObjCSpaceAfterProperty: true
|
106 |
+
ObjCSpaceBeforeProtocolList: true
|
107 |
+
PPIndentWidth: -1
|
108 |
+
PackConstructorInitializers: CurrentLine
|
109 |
+
PenaltyBreakAssignment: 2
|
110 |
+
PenaltyBreakBeforeFirstCallParameter: 1
|
111 |
+
PenaltyBreakComment: 300
|
112 |
+
PenaltyBreakFirstLessLess: 120
|
113 |
+
PenaltyBreakString: 1000
|
114 |
+
PenaltyBreakTemplateDeclaration: 10
|
115 |
+
PenaltyExcessCharacter: 1000000
|
116 |
+
PenaltyReturnTypeOnItsOwnLine: 200
|
117 |
+
PointerAlignment: Middle
|
118 |
+
QualifierAlignment: Left
|
119 |
+
#QualifierOrder: ['static', 'inline', 'friend', 'constexpr', 'const', 'volatile', 'type', 'restrict']
|
120 |
+
RawStringFormats:
|
121 |
+
- Language: Cpp
|
122 |
+
Delimiters:
|
123 |
+
- cc
|
124 |
+
- CC
|
125 |
+
- cpp
|
126 |
+
- Cpp
|
127 |
+
- CPP
|
128 |
+
- 'c++'
|
129 |
+
- 'C++'
|
130 |
+
CanonicalDelimiter: ''
|
131 |
+
ReferenceAlignment: Middle
|
132 |
+
ReflowComments: false # IndentOnly
|
133 |
+
SeparateDefinitionBlocks: Always
|
134 |
+
SortIncludes: CaseInsensitive
|
135 |
+
SortUsingDeclarations: LexicographicNumeric
|
136 |
+
SpaceAfterCStyleCast: true
|
137 |
+
SpaceAfterLogicalNot: false
|
138 |
+
SpaceAfterTemplateKeyword: true
|
139 |
+
SpaceBeforeAssignmentOperators: true
|
140 |
+
SpaceBeforeCpp11BracedList: false
|
141 |
+
SpaceBeforeCtorInitializerColon: true
|
142 |
+
SpaceBeforeInheritanceColon: true
|
143 |
+
SpaceBeforeParens: ControlStatements
|
144 |
+
SpaceBeforeRangeBasedForLoopColon: true
|
145 |
+
SpaceInEmptyBlock: false
|
146 |
+
SpaceInEmptyParentheses: false
|
147 |
+
SpacesBeforeTrailingComments: 2
|
148 |
+
SpacesInAngles: Never
|
149 |
+
SpacesInContainerLiterals: true
|
150 |
+
SpacesInLineCommentPrefix:
|
151 |
+
Minimum: 1
|
152 |
+
Maximum: -1
|
153 |
+
SpacesInParentheses: false
|
154 |
+
SpacesInSquareBrackets: false
|
155 |
+
SpaceBeforeSquareBrackets: false
|
156 |
+
Standard: c++17
|
157 |
+
TabWidth: 4
|
158 |
+
UseTab: Never
|
159 |
+
WhitespaceSensitiveMacros: ['STRINGIZE']
|
160 |
+
...
|
161 |
+
|
llama.cpp/.clang-tidy
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
Checks: >
|
3 |
+
bugprone-*,
|
4 |
+
-bugprone-easily-swappable-parameters,
|
5 |
+
-bugprone-implicit-widening-of-multiplication-result,
|
6 |
+
-bugprone-misplaced-widening-cast,
|
7 |
+
-bugprone-narrowing-conversions,
|
8 |
+
readability-*,
|
9 |
+
-readability-avoid-unconditional-preprocessor-if,
|
10 |
+
-readability-function-cognitive-complexity,
|
11 |
+
-readability-identifier-length,
|
12 |
+
-readability-implicit-bool-conversion,
|
13 |
+
-readability-magic-numbers,
|
14 |
+
-readability-uppercase-literal-suffix,
|
15 |
+
-readability-simplify-boolean-expr,
|
16 |
+
clang-analyzer-*,
|
17 |
+
-clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling,
|
18 |
+
performance-*,
|
19 |
+
portability-*,
|
20 |
+
-portability-simd-intrinsics,
|
21 |
+
misc-*,
|
22 |
+
-misc-const-correctness,
|
23 |
+
-misc-non-private-member-variables-in-classes,
|
24 |
+
-misc-no-recursion,
|
25 |
+
-misc-use-anonymous-namespace,
|
26 |
+
FormatStyle: none
|
llama.cpp/.devops/cloud-v-pipeline
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
node('x86_runner1'){ // Running on x86 runner containing latest vector qemu, latest vector gcc and all the necessary libraries
|
2 |
+
stage('Cleanup'){
|
3 |
+
cleanWs() // Cleaning previous CI build in workspace
|
4 |
+
}
|
5 |
+
stage('checkout repo'){
|
6 |
+
retry(5){ // Retry if the cloning fails due to some reason
|
7 |
+
checkout scm // Clone the repo on Runner
|
8 |
+
}
|
9 |
+
}
|
10 |
+
stage('Compiling llama.cpp'){
|
11 |
+
sh'''#!/bin/bash
|
12 |
+
make RISCV=1 RISCV_CROSS_COMPILE=1 # Compiling llama for RISC-V
|
13 |
+
'''
|
14 |
+
}
|
15 |
+
stage('Running llama.cpp'){
|
16 |
+
sh'''#!/bin/bash
|
17 |
+
module load gnu-bin2/0.1 # loading latest versions of vector qemu and vector gcc
|
18 |
+
qemu-riscv64 -L /softwares/gnu-bin2/sysroot -cpu rv64,v=true,vlen=256,elen=64,vext_spec=v1.0 ./llama-cli -m /home/alitariq/codellama-7b.Q4_K_M.gguf -p "Anything" -n 9 > llama_log.txt # Running llama.cpp on vector qemu-riscv64
|
19 |
+
cat llama_log.txt # Printing results
|
20 |
+
'''
|
21 |
+
}
|
22 |
+
}
|
llama.cpp/.devops/full-cuda.Dockerfile
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ARG UBUNTU_VERSION=22.04
|
2 |
+
# This needs to generally match the container host's environment.
|
3 |
+
ARG CUDA_VERSION=12.6.0
|
4 |
+
# Target the CUDA build image
|
5 |
+
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
|
6 |
+
|
7 |
+
FROM ${BASE_CUDA_DEV_CONTAINER} AS build
|
8 |
+
|
9 |
+
# CUDA architecture to build for (defaults to all supported archs)
|
10 |
+
ARG CUDA_DOCKER_ARCH=default
|
11 |
+
|
12 |
+
RUN apt-get update && \
|
13 |
+
apt-get install -y build-essential cmake python3 python3-pip git libcurl4-openssl-dev libgomp1
|
14 |
+
|
15 |
+
COPY requirements.txt requirements.txt
|
16 |
+
COPY requirements requirements
|
17 |
+
|
18 |
+
RUN pip install --upgrade pip setuptools wheel \
|
19 |
+
&& pip install -r requirements.txt
|
20 |
+
|
21 |
+
WORKDIR /app
|
22 |
+
|
23 |
+
COPY . .
|
24 |
+
|
25 |
+
# Use the default CUDA archs if not specified
|
26 |
+
RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
|
27 |
+
export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
|
28 |
+
fi && \
|
29 |
+
cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
|
30 |
+
cmake --build build --config Release -j$(nproc) && \
|
31 |
+
cp build/bin/* .
|
32 |
+
|
33 |
+
ENTRYPOINT ["/app/.devops/tools.sh"]
|
llama.cpp/.devops/full-musa.Dockerfile
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ARG UBUNTU_VERSION=22.04
|
2 |
+
# This needs to generally match the container host's environment.
|
3 |
+
ARG MUSA_VERSION=rc3.1.0
|
4 |
+
# Target the MUSA build image
|
5 |
+
ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
|
6 |
+
|
7 |
+
FROM ${BASE_MUSA_DEV_CONTAINER} AS build
|
8 |
+
|
9 |
+
# MUSA architecture to build for (defaults to all supported archs)
|
10 |
+
ARG MUSA_DOCKER_ARCH=default
|
11 |
+
|
12 |
+
RUN apt-get update && \
|
13 |
+
apt-get install -y build-essential cmake python3 python3-pip git libcurl4-openssl-dev libgomp1
|
14 |
+
|
15 |
+
COPY requirements.txt requirements.txt
|
16 |
+
COPY requirements requirements
|
17 |
+
|
18 |
+
RUN pip install --upgrade pip setuptools wheel \
|
19 |
+
&& pip install -r requirements.txt
|
20 |
+
|
21 |
+
WORKDIR /app
|
22 |
+
|
23 |
+
COPY . .
|
24 |
+
|
25 |
+
# Use the default MUSA archs if not specified
|
26 |
+
RUN if [ "${MUSA_DOCKER_ARCH}" != "default" ]; then \
|
27 |
+
export CMAKE_ARGS="-DMUSA_ARCHITECTURES=${MUSA_DOCKER_ARCH}"; \
|
28 |
+
fi && \
|
29 |
+
cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
|
30 |
+
cmake --build build --config Release -j$(nproc) && \
|
31 |
+
cp build/bin/* .
|
32 |
+
|
33 |
+
ENTRYPOINT ["/app/.devops/tools.sh"]
|
llama.cpp/.devops/full-rocm.Dockerfile
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ARG UBUNTU_VERSION=22.04
|
2 |
+
|
3 |
+
# This needs to generally match the container host's environment.
|
4 |
+
ARG ROCM_VERSION=5.6
|
5 |
+
|
6 |
+
# Target the CUDA build image
|
7 |
+
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
|
8 |
+
|
9 |
+
FROM ${BASE_ROCM_DEV_CONTAINER} AS build
|
10 |
+
|
11 |
+
# Unless otherwise specified, we make a fat build.
|
12 |
+
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
|
13 |
+
# This is mostly tied to rocBLAS supported archs.
|
14 |
+
ARG ROCM_DOCKER_ARCH="\
|
15 |
+
gfx803 \
|
16 |
+
gfx900 \
|
17 |
+
gfx906 \
|
18 |
+
gfx908 \
|
19 |
+
gfx90a \
|
20 |
+
gfx1010 \
|
21 |
+
gfx1030 \
|
22 |
+
gfx1100 \
|
23 |
+
gfx1101 \
|
24 |
+
gfx1102"
|
25 |
+
|
26 |
+
COPY requirements.txt requirements.txt
|
27 |
+
COPY requirements requirements
|
28 |
+
|
29 |
+
RUN pip install --upgrade pip setuptools wheel \
|
30 |
+
&& pip install -r requirements.txt
|
31 |
+
|
32 |
+
WORKDIR /app
|
33 |
+
|
34 |
+
COPY . .
|
35 |
+
|
36 |
+
# Set nvcc architecture
|
37 |
+
ENV AMDGPU_TARGETS=${ROCM_DOCKER_ARCH}
|
38 |
+
# Enable ROCm
|
39 |
+
ENV GGML_HIPBLAS=1
|
40 |
+
ENV CC=/opt/rocm/llvm/bin/clang
|
41 |
+
ENV CXX=/opt/rocm/llvm/bin/clang++
|
42 |
+
|
43 |
+
# Enable cURL
|
44 |
+
ENV LLAMA_CURL=1
|
45 |
+
RUN apt-get update && \
|
46 |
+
apt-get install -y libcurl4-openssl-dev
|
47 |
+
|
48 |
+
RUN make -j$(nproc)
|
49 |
+
|
50 |
+
ENTRYPOINT ["/app/.devops/tools.sh"]
|
llama.cpp/.devops/full.Dockerfile
ADDED
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ARG UBUNTU_VERSION=22.04
|
2 |
+
|
3 |
+
FROM ubuntu:$UBUNTU_VERSION AS build
|
4 |
+
|
5 |
+
RUN apt-get update && \
|
6 |
+
apt-get install -y build-essential python3 python3-pip git libcurl4-openssl-dev libgomp1
|
7 |
+
|
8 |
+
COPY requirements.txt requirements.txt
|
9 |
+
COPY requirements requirements
|
10 |
+
|
11 |
+
RUN pip install --upgrade pip setuptools wheel \
|
12 |
+
&& pip install -r requirements.txt
|
13 |
+
|
14 |
+
WORKDIR /app
|
15 |
+
|
16 |
+
COPY . .
|
17 |
+
|
18 |
+
ENV LLAMA_CURL=1
|
19 |
+
|
20 |
+
|
21 |
+
RUN make -j$(nproc)
|
22 |
+
|
23 |
+
ENV LC_ALL=C.utf8
|
24 |
+
|
25 |
+
ENTRYPOINT ["/app/.devops/tools.sh"]
|
llama.cpp/.devops/llama-cli-cann.Dockerfile
ADDED
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ARG ASCEND_VERSION=8.0.rc2.alpha003-910b-openeuler22.03-py3.8
|
2 |
+
|
3 |
+
FROM ascendai/cann:$ASCEND_VERSION AS build
|
4 |
+
|
5 |
+
WORKDIR /app
|
6 |
+
|
7 |
+
COPY . .
|
8 |
+
|
9 |
+
RUN yum install -y gcc g++ cmake make
|
10 |
+
ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
|
11 |
+
ENV LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:$LIBRARY_PATH
|
12 |
+
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH}
|
13 |
+
ENV PYTHONPATH=${ASCEND_TOOLKIT_HOME}/python/site-packages:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe:${PYTHONPATH}
|
14 |
+
ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${ASCEND_TOOLKIT_HOME}/compiler/ccec_compiler/bin:${PATH}
|
15 |
+
ENV ASCEND_AICPU_PATH=${ASCEND_TOOLKIT_HOME}
|
16 |
+
ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
|
17 |
+
ENV TOOLCHAIN_HOME=${ASCEND_TOOLKIT_HOME}/toolkit
|
18 |
+
ENV ASCEND_HOME_PATH=${ASCEND_TOOLKIT_HOME}
|
19 |
+
|
20 |
+
# find libascend_hal.so, because the drive hasn`t been mounted.
|
21 |
+
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/runtime/lib64/stub:$LD_LIBRARY_PATH
|
22 |
+
|
23 |
+
RUN echo "Building with static libs" && \
|
24 |
+
source /usr/local/Ascend/ascend-toolkit/set_env.sh --force && \
|
25 |
+
cmake -B build -DGGML_NATIVE=OFF -DGGML_CANN=ON -DBUILD_SHARED_LIBS=OFF && \
|
26 |
+
cmake --build build --config Release --target llama-cli
|
27 |
+
|
28 |
+
# TODO: use image with NNRT
|
29 |
+
FROM ascendai/cann:$ASCEND_VERSION AS runtime
|
30 |
+
COPY --from=build /app/build/bin/llama-cli /llama-cli
|
31 |
+
|
32 |
+
ENV LC_ALL=C.utf8
|
33 |
+
|
34 |
+
ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
|
35 |
+
ENV LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:$LIBRARY_PATH
|
36 |
+
ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH}
|
37 |
+
ENV PYTHONPATH=${ASCEND_TOOLKIT_HOME}/python/site-packages:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe:${PYTHONPATH}
|
38 |
+
ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${ASCEND_TOOLKIT_HOME}/compiler/ccec_compiler/bin:${PATH}
|
39 |
+
ENV ASCEND_AICPU_PATH=${ASCEND_TOOLKIT_HOME}
|
40 |
+
ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
|
41 |
+
ENV TOOLCHAIN_HOME=${ASCEND_TOOLKIT_HOME}/toolkit
|
42 |
+
ENV ASCEND_HOME_PATH=${ASCEND_TOOLKIT_HOME}
|
43 |
+
|
44 |
+
ENTRYPOINT ["/llama-cli" ]
|
llama.cpp/.devops/llama-cli-cuda.Dockerfile
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ARG UBUNTU_VERSION=22.04
|
2 |
+
# This needs to generally match the container host's environment.
|
3 |
+
ARG CUDA_VERSION=12.6.0
|
4 |
+
# Target the CUDA build image
|
5 |
+
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
|
6 |
+
# Target the CUDA runtime image
|
7 |
+
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
|
8 |
+
|
9 |
+
FROM ${BASE_CUDA_DEV_CONTAINER} AS build
|
10 |
+
|
11 |
+
# CUDA architecture to build for (defaults to all supported archs)
|
12 |
+
ARG CUDA_DOCKER_ARCH=default
|
13 |
+
|
14 |
+
RUN apt-get update && \
|
15 |
+
apt-get install -y build-essential git cmake
|
16 |
+
|
17 |
+
WORKDIR /app
|
18 |
+
|
19 |
+
COPY . .
|
20 |
+
|
21 |
+
# Use the default CUDA archs if not specified
|
22 |
+
RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
|
23 |
+
export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
|
24 |
+
fi && \
|
25 |
+
cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
|
26 |
+
cmake --build build --config Release --target llama-cli -j$(nproc) && \
|
27 |
+
mkdir -p /app/lib && \
|
28 |
+
find build -name "*.so" -exec cp {} /app/lib \;
|
29 |
+
|
30 |
+
FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime
|
31 |
+
|
32 |
+
RUN apt-get update && \
|
33 |
+
apt-get install -y libgomp1
|
34 |
+
|
35 |
+
COPY --from=build /app/lib/ /
|
36 |
+
COPY --from=build /app/build/bin/llama-cli /
|
37 |
+
|
38 |
+
ENTRYPOINT [ "/llama-cli" ]
|
llama.cpp/.devops/llama-cli-intel.Dockerfile
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ARG ONEAPI_VERSION=2025.0.0-0-devel-ubuntu22.04
|
2 |
+
|
3 |
+
FROM intel/oneapi-basekit:$ONEAPI_VERSION AS build
|
4 |
+
|
5 |
+
ARG GGML_SYCL_F16=OFF
|
6 |
+
RUN apt-get update && \
|
7 |
+
apt-get install -y git
|
8 |
+
|
9 |
+
WORKDIR /app
|
10 |
+
|
11 |
+
COPY . .
|
12 |
+
|
13 |
+
RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
|
14 |
+
echo "GGML_SYCL_F16 is set" && \
|
15 |
+
export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \
|
16 |
+
fi && \
|
17 |
+
echo "Building with static libs" && \
|
18 |
+
cmake -B build -DGGML_NATIVE=OFF -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx \
|
19 |
+
${OPT_SYCL_F16} -DBUILD_SHARED_LIBS=OFF && \
|
20 |
+
cmake --build build --config Release --target llama-cli
|
21 |
+
|
22 |
+
FROM intel/oneapi-basekit:$ONEAPI_VERSION AS runtime
|
23 |
+
|
24 |
+
COPY --from=build /app/build/bin/llama-cli /llama-cli
|
25 |
+
|
26 |
+
ENV LC_ALL=C.utf8
|
27 |
+
|
28 |
+
ENTRYPOINT [ "/llama-cli" ]
|
llama.cpp/.devops/llama-cli-musa.Dockerfile
ADDED
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ARG UBUNTU_VERSION=22.04
|
2 |
+
# This needs to generally match the container host's environment.
|
3 |
+
ARG MUSA_VERSION=rc3.1.0
|
4 |
+
# Target the MUSA build image
|
5 |
+
ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
|
6 |
+
# Target the MUSA runtime image
|
7 |
+
ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
|
8 |
+
|
9 |
+
FROM ${BASE_MUSA_DEV_CONTAINER} AS build
|
10 |
+
|
11 |
+
# MUSA architecture to build for (defaults to all supported archs)
|
12 |
+
ARG MUSA_DOCKER_ARCH=default
|
13 |
+
|
14 |
+
RUN apt-get update && \
|
15 |
+
apt-get install -y build-essential git cmake
|
16 |
+
|
17 |
+
WORKDIR /app
|
18 |
+
|
19 |
+
COPY . .
|
20 |
+
|
21 |
+
# Use the default MUSA archs if not specified
|
22 |
+
RUN if [ "${MUSA_DOCKER_ARCH}" != "default" ]; then \
|
23 |
+
export CMAKE_ARGS="-DMUSA_ARCHITECTURES=${MUSA_DOCKER_ARCH}"; \
|
24 |
+
fi && \
|
25 |
+
cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
|
26 |
+
cmake --build build --config Release --target llama-cli -j$(nproc) && \
|
27 |
+
mkdir -p /app/lib && \
|
28 |
+
find build -name "*.so" -exec cp {} /app/lib \;
|
29 |
+
|
30 |
+
FROM ${BASE_MUSA_RUN_CONTAINER} AS runtime
|
31 |
+
|
32 |
+
RUN apt-get update && \
|
33 |
+
apt-get install -y libgomp1
|
34 |
+
|
35 |
+
COPY --from=build /app/lib/ /
|
36 |
+
COPY --from=build /app/build/bin/llama-cli /llama-cli
|
37 |
+
|
38 |
+
ENTRYPOINT [ "/llama-cli" ]
|
llama.cpp/.devops/llama-cli-rocm.Dockerfile
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ARG UBUNTU_VERSION=22.04
|
2 |
+
|
3 |
+
# This needs to generally match the container host's environment.
|
4 |
+
ARG ROCM_VERSION=5.6
|
5 |
+
|
6 |
+
# Target the CUDA build image
|
7 |
+
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
|
8 |
+
|
9 |
+
FROM ${BASE_ROCM_DEV_CONTAINER} AS build
|
10 |
+
|
11 |
+
# Unless otherwise specified, we make a fat build.
|
12 |
+
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
|
13 |
+
# This is mostly tied to rocBLAS supported archs.
|
14 |
+
ARG ROCM_DOCKER_ARCH="\
|
15 |
+
gfx803 \
|
16 |
+
gfx900 \
|
17 |
+
gfx906 \
|
18 |
+
gfx908 \
|
19 |
+
gfx90a \
|
20 |
+
gfx1010 \
|
21 |
+
gfx1030 \
|
22 |
+
gfx1100 \
|
23 |
+
gfx1101 \
|
24 |
+
gfx1102"
|
25 |
+
|
26 |
+
COPY requirements.txt requirements.txt
|
27 |
+
COPY requirements requirements
|
28 |
+
|
29 |
+
RUN pip install --upgrade pip setuptools wheel \
|
30 |
+
&& pip install -r requirements.txt
|
31 |
+
|
32 |
+
WORKDIR /app
|
33 |
+
|
34 |
+
COPY . .
|
35 |
+
|
36 |
+
# Set nvcc architecture
|
37 |
+
ENV AMDGPU_TARGETS=${ROCM_DOCKER_ARCH}
|
38 |
+
# Enable ROCm
|
39 |
+
ENV GGML_HIPBLAS=1
|
40 |
+
ENV CC=/opt/rocm/llvm/bin/clang
|
41 |
+
ENV CXX=/opt/rocm/llvm/bin/clang++
|
42 |
+
|
43 |
+
RUN make -j$(nproc) llama-cli
|
44 |
+
|
45 |
+
ENTRYPOINT [ "/app/llama-cli" ]
|
llama.cpp/.devops/llama-cli-vulkan.Dockerfile
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ARG UBUNTU_VERSION=jammy
|
2 |
+
|
3 |
+
FROM ubuntu:$UBUNTU_VERSION AS build
|
4 |
+
|
5 |
+
# Install build tools
|
6 |
+
RUN apt update && apt install -y git build-essential cmake wget libgomp1
|
7 |
+
|
8 |
+
# Install Vulkan SDK
|
9 |
+
RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
|
10 |
+
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
|
11 |
+
apt update -y && \
|
12 |
+
apt-get install -y vulkan-sdk
|
13 |
+
|
14 |
+
# Build it
|
15 |
+
WORKDIR /app
|
16 |
+
COPY . .
|
17 |
+
RUN cmake -B build -DGGML_NATIVE=OFF -DGGML_VULKAN=1 && \
|
18 |
+
cmake --build build --config Release --target llama-cli
|
19 |
+
|
20 |
+
# Clean up
|
21 |
+
WORKDIR /
|
22 |
+
RUN cp /app/build/bin/llama-cli /llama-cli && \
|
23 |
+
rm -rf /app
|
24 |
+
|
25 |
+
ENV LC_ALL=C.utf8
|
26 |
+
|
27 |
+
ENTRYPOINT [ "/llama-cli" ]
|
llama.cpp/.devops/llama-cli.Dockerfile
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ARG UBUNTU_VERSION=22.04
|
2 |
+
|
3 |
+
FROM ubuntu:$UBUNTU_VERSION AS build
|
4 |
+
|
5 |
+
RUN apt-get update && \
|
6 |
+
apt-get install -y build-essential git
|
7 |
+
|
8 |
+
WORKDIR /app
|
9 |
+
|
10 |
+
COPY . .
|
11 |
+
|
12 |
+
RUN make -j$(nproc) llama-cli
|
13 |
+
|
14 |
+
FROM ubuntu:$UBUNTU_VERSION AS runtime
|
15 |
+
|
16 |
+
RUN apt-get update && \
|
17 |
+
apt-get install -y libgomp1
|
18 |
+
|
19 |
+
COPY --from=build /app/llama-cli /llama-cli
|
20 |
+
|
21 |
+
ENV LC_ALL=C.utf8
|
22 |
+
|
23 |
+
ENTRYPOINT [ "/llama-cli" ]
|
llama.cpp/.devops/llama-cpp-cuda.srpm.spec
ADDED
@@ -0,0 +1,83 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# SRPM for building from source and packaging an RPM for RPM-based distros.
|
2 |
+
# https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages
|
3 |
+
# Built and maintained by John Boero - [email protected]
|
4 |
+
# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
|
5 |
+
|
6 |
+
# Notes for llama.cpp:
|
7 |
+
# 1. Tags are currently based on hash - which will not sort asciibetically.
|
8 |
+
# We need to declare standard versioning if people want to sort latest releases.
|
9 |
+
# 2. Builds for CUDA/OpenCL support are separate, with different depenedencies.
|
10 |
+
# 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed.
|
11 |
+
# Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo
|
12 |
+
# 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries.
|
13 |
+
# It is up to the user to install the correct vendor-specific support.
|
14 |
+
|
15 |
+
Name: llama.cpp-cuda
|
16 |
+
Version: %( date "+%%Y%%m%%d" )
|
17 |
+
Release: 1%{?dist}
|
18 |
+
Summary: CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL)
|
19 |
+
License: MIT
|
20 |
+
Source0: https://github.com/ggerganov/llama.cpp/archive/refs/heads/master.tar.gz
|
21 |
+
BuildRequires: coreutils make gcc-c++ git cuda-toolkit
|
22 |
+
Requires: cuda-toolkit
|
23 |
+
URL: https://github.com/ggerganov/llama.cpp
|
24 |
+
|
25 |
+
%define debug_package %{nil}
|
26 |
+
%define source_date_epoch_from_changelog 0
|
27 |
+
|
28 |
+
%description
|
29 |
+
CPU inference for Meta's Lllama2 models using default options.
|
30 |
+
|
31 |
+
%prep
|
32 |
+
%setup -n llama.cpp-master
|
33 |
+
|
34 |
+
%build
|
35 |
+
make -j GGML_CUDA=1
|
36 |
+
|
37 |
+
%install
|
38 |
+
mkdir -p %{buildroot}%{_bindir}/
|
39 |
+
cp -p llama-cli %{buildroot}%{_bindir}/llama-cuda-cli
|
40 |
+
cp -p llama-server %{buildroot}%{_bindir}/llama-cuda-server
|
41 |
+
cp -p llama-simple %{buildroot}%{_bindir}/llama-cuda-simple
|
42 |
+
|
43 |
+
mkdir -p %{buildroot}/usr/lib/systemd/system
|
44 |
+
%{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llamacuda.service
|
45 |
+
[Unit]
|
46 |
+
Description=Llama.cpp server, CPU only (no GPU support in this build).
|
47 |
+
After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target
|
48 |
+
|
49 |
+
[Service]
|
50 |
+
Type=simple
|
51 |
+
EnvironmentFile=/etc/sysconfig/llama
|
52 |
+
ExecStart=/usr/bin/llama-cuda-server $LLAMA_ARGS
|
53 |
+
ExecReload=/bin/kill -s HUP $MAINPID
|
54 |
+
Restart=never
|
55 |
+
|
56 |
+
[Install]
|
57 |
+
WantedBy=default.target
|
58 |
+
EOF
|
59 |
+
|
60 |
+
mkdir -p %{buildroot}/etc/sysconfig
|
61 |
+
%{__cat} <<EOF > %{buildroot}/etc/sysconfig/llama
|
62 |
+
LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin"
|
63 |
+
EOF
|
64 |
+
|
65 |
+
%clean
|
66 |
+
rm -rf %{buildroot}
|
67 |
+
rm -rf %{_builddir}/*
|
68 |
+
|
69 |
+
%files
|
70 |
+
%{_bindir}/llama-cuda-cli
|
71 |
+
%{_bindir}/llama-cuda-server
|
72 |
+
%{_bindir}/llama-cuda-simple
|
73 |
+
/usr/lib/systemd/system/llamacuda.service
|
74 |
+
%config /etc/sysconfig/llama
|
75 |
+
|
76 |
+
%pre
|
77 |
+
|
78 |
+
%post
|
79 |
+
|
80 |
+
%preun
|
81 |
+
%postun
|
82 |
+
|
83 |
+
%changelog
|
llama.cpp/.devops/llama-cpp.srpm.spec
ADDED
@@ -0,0 +1,85 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# SRPM for building from source and packaging an RPM for RPM-based distros.
|
2 |
+
# https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages
|
3 |
+
# Built and maintained by John Boero - [email protected]
|
4 |
+
# In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
|
5 |
+
|
6 |
+
# Notes for llama.cpp:
|
7 |
+
# 1. Tags are currently based on hash - which will not sort asciibetically.
|
8 |
+
# We need to declare standard versioning if people want to sort latest releases.
|
9 |
+
# In the meantime, YYYYMMDD format will be used.
|
10 |
+
# 2. Builds for CUDA/OpenCL support are separate, with different depenedencies.
|
11 |
+
# 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed.
|
12 |
+
# Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo
|
13 |
+
# 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries.
|
14 |
+
# It is up to the user to install the correct vendor-specific support.
|
15 |
+
|
16 |
+
Name: llama.cpp
|
17 |
+
Version: %( date "+%%Y%%m%%d" )
|
18 |
+
Release: 1%{?dist}
|
19 |
+
Summary: CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL)
|
20 |
+
License: MIT
|
21 |
+
Source0: https://github.com/ggerganov/llama.cpp/archive/refs/heads/master.tar.gz
|
22 |
+
BuildRequires: coreutils make gcc-c++ git libstdc++-devel
|
23 |
+
Requires: libstdc++
|
24 |
+
URL: https://github.com/ggerganov/llama.cpp
|
25 |
+
|
26 |
+
%define debug_package %{nil}
|
27 |
+
%define source_date_epoch_from_changelog 0
|
28 |
+
|
29 |
+
%description
|
30 |
+
CPU inference for Meta's Lllama2 models using default options.
|
31 |
+
Models are not included in this package and must be downloaded separately.
|
32 |
+
|
33 |
+
%prep
|
34 |
+
%setup -n llama.cpp-master
|
35 |
+
|
36 |
+
%build
|
37 |
+
make -j
|
38 |
+
|
39 |
+
%install
|
40 |
+
mkdir -p %{buildroot}%{_bindir}/
|
41 |
+
cp -p llama-cli %{buildroot}%{_bindir}/llama-cli
|
42 |
+
cp -p llama-server %{buildroot}%{_bindir}/llama-server
|
43 |
+
cp -p llama-simple %{buildroot}%{_bindir}/llama-simple
|
44 |
+
|
45 |
+
mkdir -p %{buildroot}/usr/lib/systemd/system
|
46 |
+
%{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llama.service
|
47 |
+
[Unit]
|
48 |
+
Description=Llama.cpp server, CPU only (no GPU support in this build).
|
49 |
+
After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target
|
50 |
+
|
51 |
+
[Service]
|
52 |
+
Type=simple
|
53 |
+
EnvironmentFile=/etc/sysconfig/llama
|
54 |
+
ExecStart=/usr/bin/llama-server $LLAMA_ARGS
|
55 |
+
ExecReload=/bin/kill -s HUP $MAINPID
|
56 |
+
Restart=never
|
57 |
+
|
58 |
+
[Install]
|
59 |
+
WantedBy=default.target
|
60 |
+
EOF
|
61 |
+
|
62 |
+
mkdir -p %{buildroot}/etc/sysconfig
|
63 |
+
%{__cat} <<EOF > %{buildroot}/etc/sysconfig/llama
|
64 |
+
LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin"
|
65 |
+
EOF
|
66 |
+
|
67 |
+
%clean
|
68 |
+
rm -rf %{buildroot}
|
69 |
+
rm -rf %{_builddir}/*
|
70 |
+
|
71 |
+
%files
|
72 |
+
%{_bindir}/llama-cli
|
73 |
+
%{_bindir}/llama-server
|
74 |
+
%{_bindir}/llama-simple
|
75 |
+
/usr/lib/systemd/system/llama.service
|
76 |
+
%config /etc/sysconfig/llama
|
77 |
+
|
78 |
+
%pre
|
79 |
+
|
80 |
+
%post
|
81 |
+
|
82 |
+
%preun
|
83 |
+
%postun
|
84 |
+
|
85 |
+
%changelog
|
llama.cpp/.devops/llama-server-cuda.Dockerfile
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ARG UBUNTU_VERSION=22.04
|
2 |
+
# This needs to generally match the container host's environment.
|
3 |
+
ARG CUDA_VERSION=12.6.0
|
4 |
+
# Target the CUDA build image
|
5 |
+
ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
|
6 |
+
# Target the CUDA runtime image
|
7 |
+
ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
|
8 |
+
|
9 |
+
FROM ${BASE_CUDA_DEV_CONTAINER} AS build
|
10 |
+
|
11 |
+
# CUDA architecture to build for (defaults to all supported archs)
|
12 |
+
ARG CUDA_DOCKER_ARCH=default
|
13 |
+
|
14 |
+
RUN apt-get update && \
|
15 |
+
apt-get install -y build-essential git cmake libcurl4-openssl-dev
|
16 |
+
|
17 |
+
WORKDIR /app
|
18 |
+
|
19 |
+
COPY . .
|
20 |
+
|
21 |
+
# Use the default CUDA archs if not specified
|
22 |
+
RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
|
23 |
+
export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
|
24 |
+
fi && \
|
25 |
+
cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
|
26 |
+
cmake --build build --config Release --target llama-server -j$(nproc) && \
|
27 |
+
mkdir -p /app/lib && \
|
28 |
+
find build -name "*.so" -exec cp {} /app/lib \;
|
29 |
+
|
30 |
+
FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime
|
31 |
+
|
32 |
+
RUN apt-get update && \
|
33 |
+
apt-get install -y libcurl4-openssl-dev libgomp1 curl
|
34 |
+
|
35 |
+
COPY --from=build /app/lib/ /
|
36 |
+
COPY --from=build /app/build/bin/llama-server /llama-server
|
37 |
+
|
38 |
+
# Must be set to 0.0.0.0 so it can listen to requests from host machine
|
39 |
+
ENV LLAMA_ARG_HOST=0.0.0.0
|
40 |
+
|
41 |
+
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
42 |
+
|
43 |
+
ENTRYPOINT [ "/llama-server" ]
|
llama.cpp/.devops/llama-server-intel.Dockerfile
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ARG ONEAPI_VERSION=2025.0.0-0-devel-ubuntu22.04
|
2 |
+
|
3 |
+
FROM intel/oneapi-basekit:$ONEAPI_VERSION AS build
|
4 |
+
|
5 |
+
ARG GGML_SYCL_F16=OFF
|
6 |
+
RUN apt-get update && \
|
7 |
+
apt-get install -y git libcurl4-openssl-dev
|
8 |
+
|
9 |
+
WORKDIR /app
|
10 |
+
|
11 |
+
COPY . .
|
12 |
+
|
13 |
+
RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
|
14 |
+
echo "GGML_SYCL_F16 is set" && \
|
15 |
+
export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \
|
16 |
+
fi && \
|
17 |
+
echo "Building with dynamic libs" && \
|
18 |
+
cmake -B build -DGGML_NATIVE=OFF -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON ${OPT_SYCL_F16} && \
|
19 |
+
cmake --build build --config Release --target llama-server
|
20 |
+
|
21 |
+
FROM intel/oneapi-basekit:$ONEAPI_VERSION AS runtime
|
22 |
+
|
23 |
+
RUN apt-get update && \
|
24 |
+
apt-get install -y libcurl4-openssl-dev curl
|
25 |
+
|
26 |
+
COPY --from=build /app/build/bin/llama-server /llama-server
|
27 |
+
|
28 |
+
ENV LC_ALL=C.utf8
|
29 |
+
# Must be set to 0.0.0.0 so it can listen to requests from host machine
|
30 |
+
ENV LLAMA_ARG_HOST=0.0.0.0
|
31 |
+
|
32 |
+
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
33 |
+
|
34 |
+
ENTRYPOINT [ "/llama-server" ]
|
llama.cpp/.devops/llama-server-musa.Dockerfile
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ARG UBUNTU_VERSION=22.04
|
2 |
+
# This needs to generally match the container host's environment.
|
3 |
+
ARG MUSA_VERSION=rc3.1.0
|
4 |
+
# Target the MUSA build image
|
5 |
+
ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
|
6 |
+
# Target the MUSA runtime image
|
7 |
+
ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
|
8 |
+
|
9 |
+
FROM ${BASE_MUSA_DEV_CONTAINER} AS build
|
10 |
+
|
11 |
+
# MUSA architecture to build for (defaults to all supported archs)
|
12 |
+
ARG MUSA_DOCKER_ARCH=default
|
13 |
+
|
14 |
+
RUN apt-get update && \
|
15 |
+
apt-get install -y build-essential git cmake libcurl4-openssl-dev
|
16 |
+
|
17 |
+
WORKDIR /app
|
18 |
+
|
19 |
+
COPY . .
|
20 |
+
|
21 |
+
# Use the default MUSA archs if not specified
|
22 |
+
RUN if [ "${MUSA_DOCKER_ARCH}" != "default" ]; then \
|
23 |
+
export CMAKE_ARGS="-DMUSA_ARCHITECTURES=${MUSA_DOCKER_ARCH}"; \
|
24 |
+
fi && \
|
25 |
+
cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
|
26 |
+
cmake --build build --config Release --target llama-server -j$(nproc) && \
|
27 |
+
mkdir -p /app/lib && \
|
28 |
+
find build -name "*.so" -exec cp {} /app/lib \;
|
29 |
+
|
30 |
+
FROM ${BASE_MUSA_RUN_CONTAINER} AS runtime
|
31 |
+
|
32 |
+
RUN apt-get update && \
|
33 |
+
apt-get install -y libcurl4-openssl-dev libgomp1 curl
|
34 |
+
|
35 |
+
COPY --from=build /app/lib/ /
|
36 |
+
COPY --from=build /app/build/bin/llama-server /llama-server
|
37 |
+
|
38 |
+
# Must be set to 0.0.0.0 so it can listen to requests from host machine
|
39 |
+
ENV LLAMA_ARG_HOST=0.0.0.0
|
40 |
+
|
41 |
+
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
42 |
+
|
43 |
+
ENTRYPOINT [ "/llama-server" ]
|
llama.cpp/.devops/llama-server-rocm.Dockerfile
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ARG UBUNTU_VERSION=22.04
|
2 |
+
|
3 |
+
# This needs to generally match the container host's environment.
|
4 |
+
ARG ROCM_VERSION=5.6
|
5 |
+
|
6 |
+
# Target the CUDA build image
|
7 |
+
ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
|
8 |
+
|
9 |
+
FROM ${BASE_ROCM_DEV_CONTAINER} AS build
|
10 |
+
|
11 |
+
# Unless otherwise specified, we make a fat build.
|
12 |
+
# List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
|
13 |
+
# This is mostly tied to rocBLAS supported archs.
|
14 |
+
ARG ROCM_DOCKER_ARCH="\
|
15 |
+
gfx803 \
|
16 |
+
gfx900 \
|
17 |
+
gfx906 \
|
18 |
+
gfx908 \
|
19 |
+
gfx90a \
|
20 |
+
gfx1010 \
|
21 |
+
gfx1030 \
|
22 |
+
gfx1100 \
|
23 |
+
gfx1101 \
|
24 |
+
gfx1102"
|
25 |
+
|
26 |
+
COPY requirements.txt requirements.txt
|
27 |
+
COPY requirements requirements
|
28 |
+
|
29 |
+
RUN pip install --upgrade pip setuptools wheel \
|
30 |
+
&& pip install -r requirements.txt
|
31 |
+
|
32 |
+
WORKDIR /app
|
33 |
+
|
34 |
+
COPY . .
|
35 |
+
|
36 |
+
# Set nvcc architecture
|
37 |
+
ENV AMDGPU_TARGETS=${ROCM_DOCKER_ARCH}
|
38 |
+
# Enable ROCm
|
39 |
+
ENV GGML_HIPBLAS=1
|
40 |
+
ENV CC=/opt/rocm/llvm/bin/clang
|
41 |
+
ENV CXX=/opt/rocm/llvm/bin/clang++
|
42 |
+
# Must be set to 0.0.0.0 so it can listen to requests from host machine
|
43 |
+
ENV LLAMA_ARG_HOST=0.0.0.0
|
44 |
+
|
45 |
+
# Enable cURL
|
46 |
+
ENV LLAMA_CURL=1
|
47 |
+
RUN apt-get update && \
|
48 |
+
apt-get install -y libcurl4-openssl-dev curl
|
49 |
+
|
50 |
+
RUN make -j$(nproc) llama-server
|
51 |
+
|
52 |
+
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
53 |
+
|
54 |
+
ENTRYPOINT [ "/app/llama-server" ]
|
llama.cpp/.devops/llama-server-vulkan.Dockerfile
ADDED
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ARG UBUNTU_VERSION=jammy
|
2 |
+
|
3 |
+
FROM ubuntu:$UBUNTU_VERSION AS build
|
4 |
+
|
5 |
+
# Install build tools
|
6 |
+
RUN apt update && apt install -y git build-essential cmake wget
|
7 |
+
|
8 |
+
# Install Vulkan SDK and cURL
|
9 |
+
RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
|
10 |
+
wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list && \
|
11 |
+
apt update -y && \
|
12 |
+
apt-get install -y vulkan-sdk libcurl4-openssl-dev curl
|
13 |
+
|
14 |
+
# Build it
|
15 |
+
WORKDIR /app
|
16 |
+
COPY . .
|
17 |
+
RUN cmake -B build -DGGML_NATIVE=OFF -DGGML_VULKAN=1 -DLLAMA_CURL=1 && \
|
18 |
+
cmake --build build --config Release --target llama-server
|
19 |
+
|
20 |
+
# Clean up
|
21 |
+
WORKDIR /
|
22 |
+
RUN cp /app/build/bin/llama-server /llama-server && \
|
23 |
+
rm -rf /app
|
24 |
+
|
25 |
+
ENV LC_ALL=C.utf8
|
26 |
+
# Must be set to 0.0.0.0 so it can listen to requests from host machine
|
27 |
+
ENV LLAMA_ARG_HOST=0.0.0.0
|
28 |
+
|
29 |
+
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
30 |
+
|
31 |
+
ENTRYPOINT [ "/llama-server" ]
|
llama.cpp/.devops/llama-server.Dockerfile
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
ARG UBUNTU_VERSION=22.04
|
2 |
+
|
3 |
+
FROM ubuntu:$UBUNTU_VERSION AS build
|
4 |
+
|
5 |
+
RUN apt-get update && \
|
6 |
+
apt-get install -y build-essential git cmake libcurl4-openssl-dev
|
7 |
+
|
8 |
+
WORKDIR /app
|
9 |
+
|
10 |
+
COPY . .
|
11 |
+
|
12 |
+
|
13 |
+
RUN \
|
14 |
+
# Build multiple versions of the CPU backend
|
15 |
+
scripts/build-cpu.sh avx -DGGML_AVX=ON -DGGML_AVX2=OFF && \
|
16 |
+
scripts/build-cpu.sh avx2 -DGGML_AVX=ON -DGGML_AVX2=ON && \
|
17 |
+
scripts/build-cpu.sh avx512 -DGGML_AVX=ON -DGGML_AVX2=ON -DGGML_AVX512=ON && \
|
18 |
+
scripts/build-cpu.sh amx -DGGML_AVX=ON -DGGML_AVX2=ON -DGGML_AVX512=ON -DGGML_AVX_VNNI=ON -DGGML_AVX512_VNNI=ON -DGGML_AMX_TILE=ON -DGGML_AMX_INT8=ON && \
|
19 |
+
# Build llama-server
|
20 |
+
cmake -S . -B build -DGGML_BACKEND_DL=ON -DGGML_NATIVE=OFF -DLLAMA_CURL=ON -DCMAKE_BUILD_TYPE=Release && \
|
21 |
+
cmake --build build --target llama-server -j $(nproc) && \
|
22 |
+
# Copy the built libraries to /app/lib
|
23 |
+
mkdir -p /app/lib && \
|
24 |
+
mv libggml-cpu* /app/lib/ && \
|
25 |
+
find build -name "*.so" -exec cp {} /app/lib/ \;
|
26 |
+
|
27 |
+
FROM ubuntu:$UBUNTU_VERSION AS runtime
|
28 |
+
|
29 |
+
RUN apt-get update && \
|
30 |
+
apt-get install -y libcurl4-openssl-dev libgomp1 curl
|
31 |
+
|
32 |
+
COPY --from=build /app/build/bin/llama-server /llama-server
|
33 |
+
COPY --from=build /app/lib/ /
|
34 |
+
|
35 |
+
ENV LC_ALL=C.utf8
|
36 |
+
# Must be set to 0.0.0.0 so it can listen to requests from host machine
|
37 |
+
ENV LLAMA_ARG_HOST=0.0.0.0
|
38 |
+
|
39 |
+
HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
|
40 |
+
|
41 |
+
ENTRYPOINT [ "/llama-server" ]
|
llama.cpp/.devops/nix/apps.nix
ADDED
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
perSystem =
|
3 |
+
{ config, lib, ... }:
|
4 |
+
{
|
5 |
+
apps =
|
6 |
+
let
|
7 |
+
inherit (config.packages) default;
|
8 |
+
binaries = [
|
9 |
+
"llama-cli"
|
10 |
+
"llama-embedding"
|
11 |
+
"llama-server"
|
12 |
+
"llama-quantize"
|
13 |
+
];
|
14 |
+
mkApp = name: {
|
15 |
+
type = "app";
|
16 |
+
program = "${default}/bin/${name}";
|
17 |
+
};
|
18 |
+
in
|
19 |
+
lib.genAttrs binaries mkApp;
|
20 |
+
};
|
21 |
+
}
|
llama.cpp/.devops/nix/devshells.nix
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{ inputs, ... }:
|
2 |
+
|
3 |
+
{
|
4 |
+
perSystem =
|
5 |
+
{
|
6 |
+
config,
|
7 |
+
lib,
|
8 |
+
system,
|
9 |
+
...
|
10 |
+
}:
|
11 |
+
{
|
12 |
+
devShells =
|
13 |
+
let
|
14 |
+
pkgs = import inputs.nixpkgs { inherit system; };
|
15 |
+
stdenv = pkgs.stdenv;
|
16 |
+
scripts = config.packages.python-scripts;
|
17 |
+
in
|
18 |
+
lib.pipe (config.packages) [
|
19 |
+
(lib.concatMapAttrs (
|
20 |
+
name: package: {
|
21 |
+
${name} = pkgs.mkShell {
|
22 |
+
name = "${name}";
|
23 |
+
inputsFrom = [ package ];
|
24 |
+
shellHook = ''
|
25 |
+
echo "Entering ${name} devShell"
|
26 |
+
'';
|
27 |
+
};
|
28 |
+
"${name}-extra" =
|
29 |
+
if (name == "python-scripts") then
|
30 |
+
null
|
31 |
+
else
|
32 |
+
pkgs.mkShell {
|
33 |
+
name = "${name}-extra";
|
34 |
+
inputsFrom = [
|
35 |
+
package
|
36 |
+
scripts
|
37 |
+
];
|
38 |
+
# Extra packages that *may* be used by some scripts
|
39 |
+
packages = [
|
40 |
+
pkgs.python3Packages.tiktoken
|
41 |
+
];
|
42 |
+
shellHook = ''
|
43 |
+
echo "Entering ${name} devShell"
|
44 |
+
addToSearchPath "LD_LIBRARY_PATH" "${lib.getLib stdenv.cc.cc}/lib"
|
45 |
+
'';
|
46 |
+
};
|
47 |
+
}
|
48 |
+
))
|
49 |
+
(lib.filterAttrs (name: value: value != null))
|
50 |
+
];
|
51 |
+
};
|
52 |
+
}
|
llama.cpp/.devops/nix/docker.nix
ADDED
@@ -0,0 +1,37 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
lib,
|
3 |
+
dockerTools,
|
4 |
+
buildEnv,
|
5 |
+
llama-cpp,
|
6 |
+
interactive ? true,
|
7 |
+
coreutils,
|
8 |
+
}:
|
9 |
+
|
10 |
+
# A tar that can be fed into `docker load`:
|
11 |
+
#
|
12 |
+
# $ nix build .#llamaPackages.docker
|
13 |
+
# $ docker load < result
|
14 |
+
|
15 |
+
# For details and variations cf.
|
16 |
+
# - https://nixos.org/manual/nixpkgs/unstable/#ssec-pkgs-dockerTools-buildLayeredImage
|
17 |
+
# - https://discourse.nixos.org/t/a-faster-dockertools-buildimage-prototype/16922
|
18 |
+
# - https://nixery.dev/
|
19 |
+
|
20 |
+
# Approximate (compressed) sizes, at the time of writing, are:
|
21 |
+
#
|
22 |
+
# .#llamaPackages.docker: 125M;
|
23 |
+
# .#llamaPackagesCuda.docker: 537M;
|
24 |
+
# .#legacyPackages.aarch64-linux.llamaPackagesXavier.docker: 415M.
|
25 |
+
|
26 |
+
dockerTools.buildLayeredImage {
|
27 |
+
name = llama-cpp.pname;
|
28 |
+
tag = "latest";
|
29 |
+
|
30 |
+
contents =
|
31 |
+
[ llama-cpp ]
|
32 |
+
++ lib.optionals interactive [
|
33 |
+
coreutils
|
34 |
+
dockerTools.binSh
|
35 |
+
dockerTools.caCertificates
|
36 |
+
];
|
37 |
+
}
|
llama.cpp/.devops/nix/jetson-support.nix
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{ inputs, ... }:
|
2 |
+
{
|
3 |
+
perSystem =
|
4 |
+
{
|
5 |
+
config,
|
6 |
+
system,
|
7 |
+
lib,
|
8 |
+
pkgsCuda,
|
9 |
+
...
|
10 |
+
}:
|
11 |
+
{
|
12 |
+
legacyPackages =
|
13 |
+
let
|
14 |
+
caps.llamaPackagesXavier = "7.2";
|
15 |
+
caps.llamaPackagesOrin = "8.7";
|
16 |
+
caps.llamaPackagesTX2 = "6.2";
|
17 |
+
caps.llamaPackagesNano = "5.3";
|
18 |
+
|
19 |
+
pkgsFor =
|
20 |
+
cap:
|
21 |
+
import inputs.nixpkgs {
|
22 |
+
inherit system;
|
23 |
+
config = {
|
24 |
+
cudaSupport = true;
|
25 |
+
cudaCapabilities = [ cap ];
|
26 |
+
cudaEnableForwardCompat = false;
|
27 |
+
inherit (pkgsCuda.config) allowUnfreePredicate;
|
28 |
+
};
|
29 |
+
};
|
30 |
+
in
|
31 |
+
builtins.mapAttrs (name: cap: (pkgsFor cap).callPackage ./scope.nix { }) caps;
|
32 |
+
|
33 |
+
packages = lib.optionalAttrs (system == "aarch64-linux") {
|
34 |
+
jetson-xavier = config.legacyPackages.llamaPackagesXavier.llama-cpp;
|
35 |
+
jetson-orin = config.legacyPackages.llamaPackagesOrin.llama-cpp;
|
36 |
+
jetson-nano = config.legacyPackages.llamaPackagesNano.llama-cpp;
|
37 |
+
};
|
38 |
+
};
|
39 |
+
}
|
llama.cpp/.devops/nix/nixpkgs-instances.nix
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{ inputs, ... }:
|
2 |
+
{
|
3 |
+
# The _module.args definitions are passed on to modules as arguments. E.g.
|
4 |
+
# the module `{ pkgs ... }: { /* config */ }` implicitly uses
|
5 |
+
# `_module.args.pkgs` (defined in this case by flake-parts).
|
6 |
+
perSystem =
|
7 |
+
{ system, ... }:
|
8 |
+
{
|
9 |
+
_module.args = {
|
10 |
+
# Note: bringing up https://zimbatm.com/notes/1000-instances-of-nixpkgs
|
11 |
+
# again, the below creates several nixpkgs instances which the
|
12 |
+
# flake-centric CLI will be forced to evaluate e.g. on `nix flake show`.
|
13 |
+
#
|
14 |
+
# This is currently "slow" and "expensive", on a certain scale.
|
15 |
+
# This also isn't "right" in that this hinders dependency injection at
|
16 |
+
# the level of flake inputs. This might get removed in the foreseeable
|
17 |
+
# future.
|
18 |
+
#
|
19 |
+
# Note that you can use these expressions without Nix
|
20 |
+
# (`pkgs.callPackage ./devops/nix/scope.nix { }` is the entry point).
|
21 |
+
|
22 |
+
pkgsCuda = import inputs.nixpkgs {
|
23 |
+
inherit system;
|
24 |
+
# Ensure dependencies use CUDA consistently (e.g. that openmpi, ucc,
|
25 |
+
# and ucx are built with CUDA support)
|
26 |
+
config.cudaSupport = true;
|
27 |
+
config.allowUnfreePredicate =
|
28 |
+
p:
|
29 |
+
builtins.all (
|
30 |
+
license:
|
31 |
+
license.free
|
32 |
+
|| builtins.elem license.shortName [
|
33 |
+
"CUDA EULA"
|
34 |
+
"cuDNN EULA"
|
35 |
+
]
|
36 |
+
) (p.meta.licenses or [ p.meta.license ]);
|
37 |
+
};
|
38 |
+
# Ensure dependencies use ROCm consistently
|
39 |
+
pkgsRocm = import inputs.nixpkgs {
|
40 |
+
inherit system;
|
41 |
+
config.rocmSupport = true;
|
42 |
+
};
|
43 |
+
};
|
44 |
+
};
|
45 |
+
}
|
llama.cpp/.devops/nix/package-gguf-py.nix
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
lib,
|
3 |
+
llamaVersion,
|
4 |
+
numpy,
|
5 |
+
tqdm,
|
6 |
+
sentencepiece,
|
7 |
+
pyyaml,
|
8 |
+
poetry-core,
|
9 |
+
buildPythonPackage,
|
10 |
+
pytestCheckHook,
|
11 |
+
}:
|
12 |
+
|
13 |
+
buildPythonPackage {
|
14 |
+
pname = "gguf";
|
15 |
+
version = llamaVersion;
|
16 |
+
pyproject = true;
|
17 |
+
nativeBuildInputs = [ poetry-core ];
|
18 |
+
propagatedBuildInputs = [
|
19 |
+
numpy
|
20 |
+
tqdm
|
21 |
+
sentencepiece
|
22 |
+
pyyaml
|
23 |
+
];
|
24 |
+
src = lib.cleanSource ../../gguf-py;
|
25 |
+
pythonImportsCheck = [
|
26 |
+
"numpy"
|
27 |
+
"gguf"
|
28 |
+
];
|
29 |
+
nativeCheckInputs = [ pytestCheckHook ];
|
30 |
+
doCheck = true;
|
31 |
+
meta = with lib; {
|
32 |
+
description = "Python package for writing binary files in the GGUF format";
|
33 |
+
license = licenses.mit;
|
34 |
+
maintainers = [ maintainers.ditsuke ];
|
35 |
+
};
|
36 |
+
}
|
llama.cpp/.devops/nix/package.nix
ADDED
@@ -0,0 +1,246 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
lib,
|
3 |
+
glibc,
|
4 |
+
config,
|
5 |
+
stdenv,
|
6 |
+
runCommand,
|
7 |
+
cmake,
|
8 |
+
ninja,
|
9 |
+
pkg-config,
|
10 |
+
git,
|
11 |
+
mpi,
|
12 |
+
blas,
|
13 |
+
cudaPackages,
|
14 |
+
autoAddDriverRunpath,
|
15 |
+
darwin,
|
16 |
+
rocmPackages,
|
17 |
+
vulkan-headers,
|
18 |
+
vulkan-loader,
|
19 |
+
curl,
|
20 |
+
shaderc,
|
21 |
+
useBlas ?
|
22 |
+
builtins.all (x: !x) [
|
23 |
+
useCuda
|
24 |
+
useMetalKit
|
25 |
+
useRocm
|
26 |
+
useVulkan
|
27 |
+
]
|
28 |
+
&& blas.meta.available,
|
29 |
+
useCuda ? config.cudaSupport,
|
30 |
+
useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin,
|
31 |
+
# Increases the runtime closure size by ~700M
|
32 |
+
useMpi ? false,
|
33 |
+
useRocm ? config.rocmSupport,
|
34 |
+
enableCurl ? true,
|
35 |
+
useVulkan ? false,
|
36 |
+
llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake
|
37 |
+
|
38 |
+
# It's necessary to consistently use backendStdenv when building with CUDA support,
|
39 |
+
# otherwise we get libstdc++ errors downstream.
|
40 |
+
effectiveStdenv ? if useCuda then cudaPackages.backendStdenv else stdenv,
|
41 |
+
enableStatic ? effectiveStdenv.hostPlatform.isStatic,
|
42 |
+
precompileMetalShaders ? false,
|
43 |
+
}:
|
44 |
+
|
45 |
+
let
|
46 |
+
inherit (lib)
|
47 |
+
cmakeBool
|
48 |
+
cmakeFeature
|
49 |
+
optionals
|
50 |
+
strings
|
51 |
+
;
|
52 |
+
|
53 |
+
stdenv = throw "Use effectiveStdenv instead";
|
54 |
+
|
55 |
+
suffices =
|
56 |
+
lib.optionals useBlas [ "BLAS" ]
|
57 |
+
++ lib.optionals useCuda [ "CUDA" ]
|
58 |
+
++ lib.optionals useMetalKit [ "MetalKit" ]
|
59 |
+
++ lib.optionals useMpi [ "MPI" ]
|
60 |
+
++ lib.optionals useRocm [ "ROCm" ]
|
61 |
+
++ lib.optionals useVulkan [ "Vulkan" ];
|
62 |
+
|
63 |
+
pnameSuffix =
|
64 |
+
strings.optionalString (suffices != [ ])
|
65 |
+
"-${strings.concatMapStringsSep "-" strings.toLower suffices}";
|
66 |
+
descriptionSuffix = strings.optionalString (
|
67 |
+
suffices != [ ]
|
68 |
+
) ", accelerated with ${strings.concatStringsSep ", " suffices}";
|
69 |
+
|
70 |
+
xcrunHost = runCommand "xcrunHost" { } ''
|
71 |
+
mkdir -p $out/bin
|
72 |
+
ln -s /usr/bin/xcrun $out/bin
|
73 |
+
'';
|
74 |
+
|
75 |
+
# apple_sdk is supposed to choose sane defaults, no need to handle isAarch64
|
76 |
+
# separately
|
77 |
+
darwinBuildInputs =
|
78 |
+
with darwin.apple_sdk.frameworks;
|
79 |
+
[
|
80 |
+
Accelerate
|
81 |
+
CoreVideo
|
82 |
+
CoreGraphics
|
83 |
+
]
|
84 |
+
++ optionals useMetalKit [ MetalKit ];
|
85 |
+
|
86 |
+
cudaBuildInputs = with cudaPackages; [
|
87 |
+
cuda_cudart
|
88 |
+
cuda_cccl # <nv/target>
|
89 |
+
libcublas
|
90 |
+
];
|
91 |
+
|
92 |
+
rocmBuildInputs = with rocmPackages; [
|
93 |
+
clr
|
94 |
+
hipblas
|
95 |
+
rocblas
|
96 |
+
];
|
97 |
+
|
98 |
+
vulkanBuildInputs = [
|
99 |
+
vulkan-headers
|
100 |
+
vulkan-loader
|
101 |
+
shaderc
|
102 |
+
];
|
103 |
+
in
|
104 |
+
|
105 |
+
effectiveStdenv.mkDerivation (finalAttrs: {
|
106 |
+
pname = "llama-cpp${pnameSuffix}";
|
107 |
+
version = llamaVersion;
|
108 |
+
|
109 |
+
# Note: none of the files discarded here are visible in the sandbox or
|
110 |
+
# affect the output hash. This also means they can be modified without
|
111 |
+
# triggering a rebuild.
|
112 |
+
src = lib.cleanSourceWith {
|
113 |
+
filter =
|
114 |
+
name: type:
|
115 |
+
let
|
116 |
+
noneOf = builtins.all (x: !x);
|
117 |
+
baseName = baseNameOf name;
|
118 |
+
in
|
119 |
+
noneOf [
|
120 |
+
(lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths
|
121 |
+
(lib.hasSuffix ".md" name) # Ignore *.md changes whe computing outPaths
|
122 |
+
(lib.hasPrefix "." baseName) # Skip hidden files and directories
|
123 |
+
(baseName == "flake.lock")
|
124 |
+
];
|
125 |
+
src = lib.cleanSource ../../.;
|
126 |
+
};
|
127 |
+
|
128 |
+
postPatch = ''
|
129 |
+
substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \
|
130 |
+
--replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
|
131 |
+
substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \
|
132 |
+
--replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";"
|
133 |
+
'';
|
134 |
+
|
135 |
+
# With PR#6015 https://github.com/ggerganov/llama.cpp/pull/6015,
|
136 |
+
# `default.metallib` may be compiled with Metal compiler from XCode
|
137 |
+
# and we need to escape sandbox on MacOS to access Metal compiler.
|
138 |
+
# `xcrun` is used find the path of the Metal compiler, which is varible
|
139 |
+
# and not on $PATH
|
140 |
+
# see https://github.com/ggerganov/llama.cpp/pull/6118 for discussion
|
141 |
+
__noChroot = effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders;
|
142 |
+
|
143 |
+
nativeBuildInputs =
|
144 |
+
[
|
145 |
+
cmake
|
146 |
+
ninja
|
147 |
+
pkg-config
|
148 |
+
git
|
149 |
+
]
|
150 |
+
++ optionals useCuda [
|
151 |
+
cudaPackages.cuda_nvcc
|
152 |
+
|
153 |
+
autoAddDriverRunpath
|
154 |
+
]
|
155 |
+
++ optionals (effectiveStdenv.hostPlatform.isGnu && enableStatic) [ glibc.static ]
|
156 |
+
++ optionals (effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders) [ xcrunHost ];
|
157 |
+
|
158 |
+
buildInputs =
|
159 |
+
optionals effectiveStdenv.isDarwin darwinBuildInputs
|
160 |
+
++ optionals useCuda cudaBuildInputs
|
161 |
+
++ optionals useMpi [ mpi ]
|
162 |
+
++ optionals useRocm rocmBuildInputs
|
163 |
+
++ optionals useBlas [ blas ]
|
164 |
+
++ optionals useVulkan vulkanBuildInputs
|
165 |
+
++ optionals enableCurl [ curl ];
|
166 |
+
|
167 |
+
cmakeFlags =
|
168 |
+
[
|
169 |
+
(cmakeBool "LLAMA_BUILD_SERVER" true)
|
170 |
+
(cmakeBool "BUILD_SHARED_LIBS" (!enableStatic))
|
171 |
+
(cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
|
172 |
+
(cmakeBool "LLAMA_CURL" enableCurl)
|
173 |
+
(cmakeBool "GGML_NATIVE" false)
|
174 |
+
(cmakeBool "GGML_BLAS" useBlas)
|
175 |
+
(cmakeBool "GGML_CUDA" useCuda)
|
176 |
+
(cmakeBool "GGML_HIP" useRocm)
|
177 |
+
(cmakeBool "GGML_METAL" useMetalKit)
|
178 |
+
(cmakeBool "GGML_VULKAN" useVulkan)
|
179 |
+
(cmakeBool "GGML_STATIC" enableStatic)
|
180 |
+
]
|
181 |
+
++ optionals useCuda [
|
182 |
+
(
|
183 |
+
with cudaPackages.flags;
|
184 |
+
cmakeFeature "CMAKE_CUDA_ARCHITECTURES" (
|
185 |
+
builtins.concatStringsSep ";" (map dropDot cudaCapabilities)
|
186 |
+
)
|
187 |
+
)
|
188 |
+
]
|
189 |
+
++ optionals useRocm [
|
190 |
+
(cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.llvm.clang}/bin/clang")
|
191 |
+
(cmakeFeature "CMAKE_HIP_ARCHITECTURES" (builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets))
|
192 |
+
]
|
193 |
+
++ optionals useMetalKit [
|
194 |
+
(lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1")
|
195 |
+
(cmakeBool "GGML_METAL_EMBED_LIBRARY" (!precompileMetalShaders))
|
196 |
+
];
|
197 |
+
|
198 |
+
# Environment variables needed for ROCm
|
199 |
+
env = optionals useRocm {
|
200 |
+
ROCM_PATH = "${rocmPackages.clr}";
|
201 |
+
HIP_DEVICE_LIB_PATH = "${rocmPackages.rocm-device-libs}/amdgcn/bitcode";
|
202 |
+
};
|
203 |
+
|
204 |
+
# TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
|
205 |
+
# if they haven't been added yet.
|
206 |
+
postInstall = ''
|
207 |
+
mkdir -p $out/include
|
208 |
+
cp $src/include/llama.h $out/include/
|
209 |
+
'';
|
210 |
+
|
211 |
+
meta = {
|
212 |
+
# Configurations we don't want even the CI to evaluate. Results in the
|
213 |
+
# "unsupported platform" messages. This is mostly a no-op, because
|
214 |
+
# cudaPackages would've refused to evaluate anyway.
|
215 |
+
badPlatforms = optionals useCuda lib.platforms.darwin;
|
216 |
+
|
217 |
+
# Configurations that are known to result in build failures. Can be
|
218 |
+
# overridden by importing Nixpkgs with `allowBroken = true`.
|
219 |
+
broken = (useMetalKit && !effectiveStdenv.isDarwin);
|
220 |
+
|
221 |
+
description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
|
222 |
+
homepage = "https://github.com/ggerganov/llama.cpp/";
|
223 |
+
license = lib.licenses.mit;
|
224 |
+
|
225 |
+
# Accommodates `nix run` and `lib.getExe`
|
226 |
+
mainProgram = "llama-cli";
|
227 |
+
|
228 |
+
# These people might respond, on the best effort basis, if you ping them
|
229 |
+
# in case of Nix-specific regressions or for reviewing Nix-specific PRs.
|
230 |
+
# Consider adding yourself to this list if you want to ensure this flake
|
231 |
+
# stays maintained and you're willing to invest your time. Do not add
|
232 |
+
# other people without their consent. Consider removing people after
|
233 |
+
# they've been unreachable for long periods of time.
|
234 |
+
|
235 |
+
# Note that lib.maintainers is defined in Nixpkgs, but you may just add
|
236 |
+
# an attrset following the same format as in
|
237 |
+
# https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix
|
238 |
+
maintainers = with lib.maintainers; [
|
239 |
+
philiptaron
|
240 |
+
SomeoneSerge
|
241 |
+
];
|
242 |
+
|
243 |
+
# Extend `badPlatforms` instead
|
244 |
+
platforms = lib.platforms.all;
|
245 |
+
};
|
246 |
+
})
|
llama.cpp/.devops/nix/python-scripts.nix
ADDED
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
lib,
|
3 |
+
stdenv,
|
4 |
+
buildPythonPackage,
|
5 |
+
poetry-core,
|
6 |
+
mkShell,
|
7 |
+
python3Packages,
|
8 |
+
gguf-py,
|
9 |
+
}@inputs:
|
10 |
+
|
11 |
+
let
|
12 |
+
llama-python-deps = with python3Packages; [
|
13 |
+
numpy
|
14 |
+
sentencepiece
|
15 |
+
transformers
|
16 |
+
protobuf
|
17 |
+
torchWithoutCuda
|
18 |
+
gguf-py
|
19 |
+
tqdm
|
20 |
+
|
21 |
+
# for scripts/compare-llama-bench.py
|
22 |
+
gitpython
|
23 |
+
tabulate
|
24 |
+
|
25 |
+
# for examples/pydantic-models-to-grammar-examples.py
|
26 |
+
docstring-parser
|
27 |
+
pydantic
|
28 |
+
|
29 |
+
];
|
30 |
+
|
31 |
+
llama-python-test-deps = with python3Packages; [
|
32 |
+
# Server bench
|
33 |
+
matplotlib
|
34 |
+
|
35 |
+
# server tests
|
36 |
+
openai
|
37 |
+
pytest
|
38 |
+
prometheus-client
|
39 |
+
];
|
40 |
+
in
|
41 |
+
|
42 |
+
buildPythonPackage ({
|
43 |
+
pname = "llama-scripts";
|
44 |
+
version = "0.0.0";
|
45 |
+
pyproject = true;
|
46 |
+
|
47 |
+
# NOTE: The files filtered out here are not visible in the build sandbox, neither
|
48 |
+
# do they affect the output hash. They can be modified without triggering a rebuild.
|
49 |
+
src = lib.cleanSourceWith {
|
50 |
+
filter =
|
51 |
+
name: type:
|
52 |
+
let
|
53 |
+
any = builtins.any (x: x);
|
54 |
+
baseName = builtins.baseNameOf name;
|
55 |
+
in
|
56 |
+
any [
|
57 |
+
(lib.hasSuffix ".py" name)
|
58 |
+
(baseName == "README.md")
|
59 |
+
(baseName == "pyproject.toml")
|
60 |
+
];
|
61 |
+
src = lib.cleanSource ../../.;
|
62 |
+
};
|
63 |
+
nativeBuildInputs = [ poetry-core ];
|
64 |
+
nativeCheckInputs = llama-python-test-deps;
|
65 |
+
dependencies = llama-python-deps;
|
66 |
+
})
|
llama.cpp/.devops/nix/scope.nix
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
lib,
|
3 |
+
newScope,
|
4 |
+
python3,
|
5 |
+
llamaVersion ? "0.0.0",
|
6 |
+
}:
|
7 |
+
|
8 |
+
let
|
9 |
+
pythonPackages = python3.pkgs;
|
10 |
+
buildPythonPackage = pythonPackages.buildPythonPackage;
|
11 |
+
numpy = pythonPackages.numpy;
|
12 |
+
tqdm = pythonPackages.tqdm;
|
13 |
+
sentencepiece = pythonPackages.sentencepiece;
|
14 |
+
pyyaml = pythonPackages.pyyaml;
|
15 |
+
poetry-core = pythonPackages.poetry-core;
|
16 |
+
pytestCheckHook = pythonPackages.pytestCheckHook;
|
17 |
+
in
|
18 |
+
|
19 |
+
# We're using `makeScope` instead of just writing out an attrset
|
20 |
+
# because it allows users to apply overlays later using `overrideScope'`.
|
21 |
+
# Cf. https://noogle.dev/f/lib/makeScope
|
22 |
+
|
23 |
+
lib.makeScope newScope (self: {
|
24 |
+
inherit llamaVersion;
|
25 |
+
gguf-py = self.callPackage ./package-gguf-py.nix {
|
26 |
+
inherit
|
27 |
+
buildPythonPackage
|
28 |
+
numpy
|
29 |
+
tqdm
|
30 |
+
sentencepiece
|
31 |
+
poetry-core
|
32 |
+
pyyaml
|
33 |
+
pytestCheckHook
|
34 |
+
;
|
35 |
+
};
|
36 |
+
python-scripts = self.callPackage ./python-scripts.nix { inherit buildPythonPackage poetry-core; };
|
37 |
+
llama-cpp = self.callPackage ./package.nix { };
|
38 |
+
docker = self.callPackage ./docker.nix { };
|
39 |
+
docker-min = self.callPackage ./docker.nix { interactive = false; };
|
40 |
+
sif = self.callPackage ./sif.nix { };
|
41 |
+
})
|
llama.cpp/.devops/nix/sif.nix
ADDED
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
lib,
|
3 |
+
singularity-tools,
|
4 |
+
llama-cpp,
|
5 |
+
bashInteractive,
|
6 |
+
interactive ? false,
|
7 |
+
}:
|
8 |
+
|
9 |
+
let
|
10 |
+
optionalInt = cond: x: if cond then x else 0;
|
11 |
+
in
|
12 |
+
singularity-tools.buildImage rec {
|
13 |
+
inherit (llama-cpp) name;
|
14 |
+
contents = [ llama-cpp ] ++ lib.optionals interactive [ bashInteractive ];
|
15 |
+
|
16 |
+
# These are excessive (but safe) for most variants. Building singularity
|
17 |
+
# images requires superuser privileges, so we build them inside a VM in a
|
18 |
+
# writable image of pre-determined size.
|
19 |
+
#
|
20 |
+
# ROCm is currently affected by https://github.com/NixOS/nixpkgs/issues/276846
|
21 |
+
#
|
22 |
+
# Expected image sizes:
|
23 |
+
# - cpu/blas: 150M,
|
24 |
+
# - cuda, all gencodes: 560M,
|
25 |
+
diskSize = 4096 + optionalInt llama-cpp.useRocm 16384;
|
26 |
+
memSize = diskSize;
|
27 |
+
}
|
llama.cpp/.devops/tools.sh
ADDED
@@ -0,0 +1,41 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
#!/bin/bash
|
2 |
+
set -e
|
3 |
+
|
4 |
+
# Read the first argument into a variable
|
5 |
+
arg1="$1"
|
6 |
+
|
7 |
+
# Shift the arguments to remove the first one
|
8 |
+
shift
|
9 |
+
|
10 |
+
if [[ "$arg1" == '--convert' || "$arg1" == '-c' ]]; then
|
11 |
+
python3 ./convert_hf_to_gguf.py "$@"
|
12 |
+
elif [[ "$arg1" == '--quantize' || "$arg1" == '-q' ]]; then
|
13 |
+
./llama-quantize "$@"
|
14 |
+
elif [[ "$arg1" == '--run' || "$arg1" == '-r' ]]; then
|
15 |
+
./llama-cli "$@"
|
16 |
+
elif [[ "$arg1" == '--all-in-one' || "$arg1" == '-a' ]]; then
|
17 |
+
echo "Converting PTH to GGML..."
|
18 |
+
for i in `ls $1/$2/ggml-model-f16.bin*`; do
|
19 |
+
if [ -f "${i/f16/q4_0}" ]; then
|
20 |
+
echo "Skip model quantization, it already exists: ${i/f16/q4_0}"
|
21 |
+
else
|
22 |
+
echo "Converting PTH to GGML: $i into ${i/f16/q4_0}..."
|
23 |
+
./llama-quantize "$i" "${i/f16/q4_0}" q4_0
|
24 |
+
fi
|
25 |
+
done
|
26 |
+
elif [[ "$arg1" == '--server' || "$arg1" == '-s' ]]; then
|
27 |
+
./llama-server "$@"
|
28 |
+
else
|
29 |
+
echo "Unknown command: $arg1"
|
30 |
+
echo "Available commands: "
|
31 |
+
echo " --run (-r): Run a model previously converted into ggml"
|
32 |
+
echo " ex: -m /models/7B/ggml-model-q4_0.bin -p \"Building a website can be done in 10 simple steps:\" -n 512"
|
33 |
+
echo " --convert (-c): Convert a llama model into ggml"
|
34 |
+
echo " ex: --outtype f16 \"/models/7B/\" "
|
35 |
+
echo " --quantize (-q): Optimize with quantization process ggml"
|
36 |
+
echo " ex: \"/models/7B/ggml-model-f16.bin\" \"/models/7B/ggml-model-q4_0.bin\" 2"
|
37 |
+
echo " --all-in-one (-a): Execute --convert & --quantize"
|
38 |
+
echo " ex: \"/models/\" 7B"
|
39 |
+
echo " --server (-s): Run a model on the server"
|
40 |
+
echo " ex: -m /models/7B/ggml-model-q4_0.bin -c 2048 -ngl 43 -mg 1 --port 8080"
|
41 |
+
fi
|
llama.cpp/.dockerignore
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
*.o
|
2 |
+
*.a
|
3 |
+
.cache/
|
4 |
+
# Do not ignore .git directory, otherwise the reported build number will always be 0
|
5 |
+
.github/
|
6 |
+
.gitignore
|
7 |
+
.vs/
|
8 |
+
.vscode/
|
9 |
+
.DS_Store
|
10 |
+
|
11 |
+
build*/
|
12 |
+
|
13 |
+
models/*
|
14 |
+
|
15 |
+
/llama-cli
|
16 |
+
/llama-quantize
|
17 |
+
|
18 |
+
arm_neon.h
|
19 |
+
compile_commands.json
|
20 |
+
Dockerfile
|
llama.cpp/.ecrc
ADDED
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"Exclude": ["^\\.gitmodules$", "stb_image\\.h"],
|
3 |
+
"Disable": {
|
4 |
+
"IndentSize": true
|
5 |
+
}
|
6 |
+
}
|
llama.cpp/.editorconfig
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# https://EditorConfig.org
|
2 |
+
|
3 |
+
# Top-most EditorConfig file
|
4 |
+
root = true
|
5 |
+
|
6 |
+
# Unix-style newlines with a newline ending every file, utf-8 charset
|
7 |
+
[*]
|
8 |
+
end_of_line = lf
|
9 |
+
insert_final_newline = true
|
10 |
+
trim_trailing_whitespace = true
|
11 |
+
charset = utf-8
|
12 |
+
indent_style = space
|
13 |
+
indent_size = 4
|
14 |
+
|
15 |
+
[Makefile]
|
16 |
+
indent_style = tab
|
17 |
+
|
18 |
+
[scripts/*.mk]
|
19 |
+
indent_style = tab
|
20 |
+
|
21 |
+
[prompts/*.txt]
|
22 |
+
insert_final_newline = unset
|
23 |
+
|
24 |
+
[examples/server/public/*]
|
25 |
+
indent_size = 2
|
26 |
+
|
27 |
+
[examples/server/public/deps_*]
|
28 |
+
trim_trailing_whitespace = unset
|
29 |
+
indent_style = unset
|
30 |
+
indent_size = unset
|
31 |
+
|
32 |
+
[examples/server/deps_*]
|
33 |
+
trim_trailing_whitespace = unset
|
34 |
+
indent_style = unset
|
35 |
+
indent_size = unset
|
36 |
+
|
37 |
+
[examples/llama.swiftui/llama.swiftui.xcodeproj/*]
|
38 |
+
indent_style = tab
|
39 |
+
|
40 |
+
[examples/cvector-generator/*.txt]
|
41 |
+
trim_trailing_whitespace = unset
|
42 |
+
insert_final_newline = unset
|
llama.cpp/.flake8
ADDED
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[flake8]
|
2 |
+
max-line-length = 125
|
3 |
+
ignore = E203,E211,E221,E225,E231,E241,E251,E261,E266,E501,E701,E704,W503
|
4 |
+
exclude =
|
5 |
+
# Do not traverse examples
|
6 |
+
examples,
|
7 |
+
# Do not include package initializers
|
8 |
+
__init__.py,
|
9 |
+
# No need to traverse our git directory
|
10 |
+
.git,
|
11 |
+
# There's no value in checking cache directories
|
12 |
+
__pycache__,
|
13 |
+
# No need to include the build path
|
14 |
+
build,
|
15 |
+
# This contains builds that we don't want to check
|
16 |
+
dist # This is generated with `python build .` for package releases
|
17 |
+
# max-complexity = 10
|
llama.cpp/.github/ISSUE_TEMPLATE/010-bug-compilation.yml
ADDED
@@ -0,0 +1,77 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: Bug (compilation)
|
2 |
+
description: Something goes wrong when trying to compile llama.cpp.
|
3 |
+
title: "Compile bug: "
|
4 |
+
labels: ["bug-unconfirmed", "compilation"]
|
5 |
+
body:
|
6 |
+
- type: markdown
|
7 |
+
attributes:
|
8 |
+
value: >
|
9 |
+
Thanks for taking the time to fill out this bug report!
|
10 |
+
This issue template is intended for bug reports where the compilation of llama.cpp fails.
|
11 |
+
Before opening an issue, please confirm that the compilation still fails with `-DGGML_CCACHE=OFF`.
|
12 |
+
If the compilation succeeds with ccache disabled you should be able to permanently fix the issue
|
13 |
+
by clearing `~/.cache/ccache` (on Linux).
|
14 |
+
- type: textarea
|
15 |
+
id: commit
|
16 |
+
attributes:
|
17 |
+
label: Git commit
|
18 |
+
description: Which commit are you trying to compile?
|
19 |
+
placeholder: |
|
20 |
+
$git rev-parse HEAD
|
21 |
+
84a07a17b1b08cf2b9747c633a2372782848a27f
|
22 |
+
validations:
|
23 |
+
required: true
|
24 |
+
- type: dropdown
|
25 |
+
id: operating-system
|
26 |
+
attributes:
|
27 |
+
label: Operating systems
|
28 |
+
description: Which operating systems do you know to be affected?
|
29 |
+
multiple: true
|
30 |
+
options:
|
31 |
+
- Linux
|
32 |
+
- Mac
|
33 |
+
- Windows
|
34 |
+
- BSD
|
35 |
+
- Other? (Please let us know in description)
|
36 |
+
validations:
|
37 |
+
required: true
|
38 |
+
- type: dropdown
|
39 |
+
id: backends
|
40 |
+
attributes:
|
41 |
+
label: GGML backends
|
42 |
+
description: Which GGML backends do you know to be affected?
|
43 |
+
options: [AMX, BLAS, CPU, CUDA, HIP, Kompute, Metal, Musa, RPC, SYCL, Vulkan]
|
44 |
+
multiple: true
|
45 |
+
validations:
|
46 |
+
required: true
|
47 |
+
- type: textarea
|
48 |
+
id: info
|
49 |
+
attributes:
|
50 |
+
label: Problem description & steps to reproduce
|
51 |
+
description: >
|
52 |
+
Please give us a summary of the problem and tell us how to reproduce it.
|
53 |
+
If you can narrow down the bug to specific compile flags, that information would be very much appreciated by us.
|
54 |
+
placeholder: >
|
55 |
+
I'm trying to compile llama.cpp with CUDA support on a fresh install of Ubuntu and get error XY.
|
56 |
+
Here are the exact commands that I used: ...
|
57 |
+
validations:
|
58 |
+
required: true
|
59 |
+
- type: textarea
|
60 |
+
id: first_bad_commit
|
61 |
+
attributes:
|
62 |
+
label: First Bad Commit
|
63 |
+
description: >
|
64 |
+
If the bug was not present on an earlier version: when did it start appearing?
|
65 |
+
If possible, please do a git bisect and identify the exact commit that introduced the bug.
|
66 |
+
validations:
|
67 |
+
required: false
|
68 |
+
- type: textarea
|
69 |
+
id: logs
|
70 |
+
attributes:
|
71 |
+
label: Relevant log output
|
72 |
+
description: >
|
73 |
+
Please copy and paste any relevant log output, including the command that you entered and any generated text.
|
74 |
+
This will be automatically formatted into code, so no need for backticks.
|
75 |
+
render: shell
|
76 |
+
validations:
|
77 |
+
required: true
|
llama.cpp/.github/ISSUE_TEMPLATE/011-bug-results.yml
ADDED
@@ -0,0 +1,101 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: Bug (model use)
|
2 |
+
description: Something goes wrong when using a model (in general, not specific to a single llama.cpp module).
|
3 |
+
title: "Eval bug: "
|
4 |
+
labels: ["bug-unconfirmed", "model evaluation"]
|
5 |
+
body:
|
6 |
+
- type: markdown
|
7 |
+
attributes:
|
8 |
+
value: >
|
9 |
+
Thanks for taking the time to fill out this bug report!
|
10 |
+
This issue template is intended for bug reports where the model evaluation results
|
11 |
+
(i.e. the generated text) are incorrect or llama.cpp crashes during model evaluation.
|
12 |
+
If you encountered the issue while using an external UI (e.g. ollama),
|
13 |
+
please reproduce your issue using one of the examples/binaries in this repository.
|
14 |
+
The `llama-cli` binary can be used for simple and reproducible model inference.
|
15 |
+
- type: textarea
|
16 |
+
id: version
|
17 |
+
attributes:
|
18 |
+
label: Name and Version
|
19 |
+
description: Which version of our software are you running? (use `--version` to get a version string)
|
20 |
+
placeholder: |
|
21 |
+
$./llama-cli --version
|
22 |
+
version: 2999 (42b4109e)
|
23 |
+
built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
|
24 |
+
validations:
|
25 |
+
required: true
|
26 |
+
- type: dropdown
|
27 |
+
id: operating-system
|
28 |
+
attributes:
|
29 |
+
label: Operating systems
|
30 |
+
description: Which operating systems do you know to be affected?
|
31 |
+
multiple: true
|
32 |
+
options:
|
33 |
+
- Linux
|
34 |
+
- Mac
|
35 |
+
- Windows
|
36 |
+
- BSD
|
37 |
+
- Other? (Please let us know in description)
|
38 |
+
validations:
|
39 |
+
required: true
|
40 |
+
- type: dropdown
|
41 |
+
id: backends
|
42 |
+
attributes:
|
43 |
+
label: GGML backends
|
44 |
+
description: Which GGML backends do you know to be affected?
|
45 |
+
options: [AMX, BLAS, CPU, CUDA, HIP, Kompute, Metal, Musa, RPC, SYCL, Vulkan]
|
46 |
+
multiple: true
|
47 |
+
validations:
|
48 |
+
required: true
|
49 |
+
- type: textarea
|
50 |
+
id: hardware
|
51 |
+
attributes:
|
52 |
+
label: Hardware
|
53 |
+
description: Which CPUs/GPUs are you using?
|
54 |
+
placeholder: >
|
55 |
+
e.g. Ryzen 5950X + 2x RTX 4090
|
56 |
+
validations:
|
57 |
+
required: true
|
58 |
+
- type: textarea
|
59 |
+
id: model
|
60 |
+
attributes:
|
61 |
+
label: Models
|
62 |
+
description: >
|
63 |
+
Which model(s) at which quantization were you using when encountering the bug?
|
64 |
+
If you downloaded a GGUF file off of Huggingface, please provide a link.
|
65 |
+
placeholder: >
|
66 |
+
e.g. Meta LLaMA 3.1 Instruct 8b q4_K_M
|
67 |
+
validations:
|
68 |
+
required: false
|
69 |
+
- type: textarea
|
70 |
+
id: info
|
71 |
+
attributes:
|
72 |
+
label: Problem description & steps to reproduce
|
73 |
+
description: >
|
74 |
+
Please give us a summary of the problem and tell us how to reproduce it.
|
75 |
+
If you can narrow down the bug to specific hardware, compile flags, or command line arguments,
|
76 |
+
that information would be very much appreciated by us.
|
77 |
+
placeholder: >
|
78 |
+
e.g. when I run llama-cli with -ngl 99 I get garbled outputs.
|
79 |
+
When I use -ngl 0 it works correctly.
|
80 |
+
Here are the exact commands that I used: ...
|
81 |
+
validations:
|
82 |
+
required: true
|
83 |
+
- type: textarea
|
84 |
+
id: first_bad_commit
|
85 |
+
attributes:
|
86 |
+
label: First Bad Commit
|
87 |
+
description: >
|
88 |
+
If the bug was not present on an earlier version: when did it start appearing?
|
89 |
+
If possible, please do a git bisect and identify the exact commit that introduced the bug.
|
90 |
+
validations:
|
91 |
+
required: false
|
92 |
+
- type: textarea
|
93 |
+
id: logs
|
94 |
+
attributes:
|
95 |
+
label: Relevant log output
|
96 |
+
description: >
|
97 |
+
Please copy and paste any relevant log output, including the command that you entered and any generated text.
|
98 |
+
This will be automatically formatted into code, so no need for backticks.
|
99 |
+
render: shell
|
100 |
+
validations:
|
101 |
+
required: true
|
llama.cpp/.github/ISSUE_TEMPLATE/019-bug-misc.yml
ADDED
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: Bug (misc.)
|
2 |
+
description: Something is not working the way it should (and it's not covered by any of the above cases).
|
3 |
+
title: "Misc. bug: "
|
4 |
+
labels: ["bug-unconfirmed"]
|
5 |
+
body:
|
6 |
+
- type: markdown
|
7 |
+
attributes:
|
8 |
+
value: >
|
9 |
+
Thanks for taking the time to fill out this bug report!
|
10 |
+
This issue template is intended for miscellaneous bugs that don't fit into any other category.
|
11 |
+
If you encountered the issue while using an external UI (e.g. ollama),
|
12 |
+
please reproduce your issue using one of the examples/binaries in this repository.
|
13 |
+
- type: textarea
|
14 |
+
id: version
|
15 |
+
attributes:
|
16 |
+
label: Name and Version
|
17 |
+
description: Which version of our software is affected? (You can use `--version` to get a version string.)
|
18 |
+
placeholder: |
|
19 |
+
$./llama-cli --version
|
20 |
+
version: 2999 (42b4109e)
|
21 |
+
built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
|
22 |
+
validations:
|
23 |
+
required: true
|
24 |
+
- type: dropdown
|
25 |
+
id: operating-system
|
26 |
+
attributes:
|
27 |
+
label: Operating systems
|
28 |
+
description: Which operating systems do you know to be affected?
|
29 |
+
multiple: true
|
30 |
+
options:
|
31 |
+
- Linux
|
32 |
+
- Mac
|
33 |
+
- Windows
|
34 |
+
- BSD
|
35 |
+
- Other? (Please let us know in description)
|
36 |
+
validations:
|
37 |
+
required: false
|
38 |
+
- type: dropdown
|
39 |
+
id: module
|
40 |
+
attributes:
|
41 |
+
label: Which llama.cpp modules do you know to be affected?
|
42 |
+
multiple: true
|
43 |
+
options:
|
44 |
+
- Documentation/Github
|
45 |
+
- libllama (core library)
|
46 |
+
- llama-cli
|
47 |
+
- llama-server
|
48 |
+
- llama-bench
|
49 |
+
- llama-quantize
|
50 |
+
- Python/Bash scripts
|
51 |
+
- Test code
|
52 |
+
- Other (Please specify in the next section)
|
53 |
+
validations:
|
54 |
+
required: false
|
55 |
+
- type: textarea
|
56 |
+
id: info
|
57 |
+
attributes:
|
58 |
+
label: Problem description & steps to reproduce
|
59 |
+
description: >
|
60 |
+
Please give us a summary of the problem and tell us how to reproduce it (if applicable).
|
61 |
+
validations:
|
62 |
+
required: true
|
63 |
+
- type: textarea
|
64 |
+
id: first_bad_commit
|
65 |
+
attributes:
|
66 |
+
label: First Bad Commit
|
67 |
+
description: >
|
68 |
+
If the bug was not present on an earlier version and it's not trivial to track down: when did it start appearing?
|
69 |
+
If possible, please do a git bisect and identify the exact commit that introduced the bug.
|
70 |
+
validations:
|
71 |
+
required: false
|
72 |
+
- type: textarea
|
73 |
+
id: logs
|
74 |
+
attributes:
|
75 |
+
label: Relevant log output
|
76 |
+
description: >
|
77 |
+
If applicable, please copy and paste any relevant log output, including the command that you entered and any generated text.
|
78 |
+
This will be automatically formatted into code, so no need for backticks.
|
79 |
+
render: shell
|
80 |
+
validations:
|
81 |
+
required: false
|
llama.cpp/.github/ISSUE_TEMPLATE/020-enhancement.yml
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: Enhancement
|
2 |
+
description: Used to request enhancements for llama.cpp.
|
3 |
+
title: "Feature Request: "
|
4 |
+
labels: ["enhancement"]
|
5 |
+
body:
|
6 |
+
- type: markdown
|
7 |
+
attributes:
|
8 |
+
value: |
|
9 |
+
[Please post your idea first in Discussion if there is not yet a consensus for this enhancement request. This will help to keep this issue tracker focused on enhancements that the community has agreed needs to be implemented.](https://github.com/ggerganov/llama.cpp/discussions/categories/ideas)
|
10 |
+
|
11 |
+
- type: checkboxes
|
12 |
+
id: prerequisites
|
13 |
+
attributes:
|
14 |
+
label: Prerequisites
|
15 |
+
description: Please confirm the following before submitting your enhancement request.
|
16 |
+
options:
|
17 |
+
- label: I am running the latest code. Mention the version if possible as well.
|
18 |
+
required: true
|
19 |
+
- label: I carefully followed the [README.md](https://github.com/ggerganov/llama.cpp/blob/master/README.md).
|
20 |
+
required: true
|
21 |
+
- label: I searched using keywords relevant to my issue to make sure that I am creating a new issue that is not already open (or closed).
|
22 |
+
required: true
|
23 |
+
- label: I reviewed the [Discussions](https://github.com/ggerganov/llama.cpp/discussions), and have a new and useful enhancement to share.
|
24 |
+
required: true
|
25 |
+
|
26 |
+
- type: textarea
|
27 |
+
id: feature-description
|
28 |
+
attributes:
|
29 |
+
label: Feature Description
|
30 |
+
description: Please provide a detailed written description of what you were trying to do, and what you expected `llama.cpp` to do as an enhancement.
|
31 |
+
placeholder: Detailed description of the enhancement
|
32 |
+
validations:
|
33 |
+
required: true
|
34 |
+
|
35 |
+
- type: textarea
|
36 |
+
id: motivation
|
37 |
+
attributes:
|
38 |
+
label: Motivation
|
39 |
+
description: Please provide a detailed written description of reasons why this feature is necessary and how it is useful to `llama.cpp` users.
|
40 |
+
placeholder: Explanation of why this feature is needed and its benefits
|
41 |
+
validations:
|
42 |
+
required: true
|
43 |
+
|
44 |
+
- type: textarea
|
45 |
+
id: possible-implementation
|
46 |
+
attributes:
|
47 |
+
label: Possible Implementation
|
48 |
+
description: If you have an idea as to how it can be implemented, please write a detailed description. Feel free to give links to external sources or share visuals that might be helpful to understand the details better.
|
49 |
+
placeholder: Detailed description of potential implementation
|
50 |
+
validations:
|
51 |
+
required: false
|
llama.cpp/.github/ISSUE_TEMPLATE/030-research.yml
ADDED
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: Research
|
2 |
+
description: Track new technical research area.
|
3 |
+
title: "Research: "
|
4 |
+
labels: ["research 🔬"]
|
5 |
+
body:
|
6 |
+
- type: markdown
|
7 |
+
attributes:
|
8 |
+
value: |
|
9 |
+
Don't forget to check for any [duplicate research issue tickets](https://github.com/ggerganov/llama.cpp/issues?q=is%3Aopen+is%3Aissue+label%3A%22research+%F0%9F%94%AC%22)
|
10 |
+
|
11 |
+
- type: checkboxes
|
12 |
+
id: research-stage
|
13 |
+
attributes:
|
14 |
+
label: Research Stage
|
15 |
+
description: Track general state of this research ticket
|
16 |
+
options:
|
17 |
+
- label: Background Research (Let's try to avoid reinventing the wheel)
|
18 |
+
- label: Hypothesis Formed (How do you think this will work and it's effect?)
|
19 |
+
- label: Strategy / Implementation Forming
|
20 |
+
- label: Analysis of results
|
21 |
+
- label: Debrief / Documentation (So people in the future can learn from us)
|
22 |
+
|
23 |
+
- type: textarea
|
24 |
+
id: background
|
25 |
+
attributes:
|
26 |
+
label: Previous existing literature and research
|
27 |
+
description: Whats the current state of the art and whats the motivation for this research?
|
28 |
+
|
29 |
+
- type: textarea
|
30 |
+
id: hypothesis
|
31 |
+
attributes:
|
32 |
+
label: Hypothesis
|
33 |
+
description: How do you think this will work and it's effect?
|
34 |
+
|
35 |
+
- type: textarea
|
36 |
+
id: implementation
|
37 |
+
attributes:
|
38 |
+
label: Implementation
|
39 |
+
description: Got an approach? e.g. a PR ready to go?
|
40 |
+
|
41 |
+
- type: textarea
|
42 |
+
id: analysis
|
43 |
+
attributes:
|
44 |
+
label: Analysis
|
45 |
+
description: How does the proposed implementation behave?
|
46 |
+
|
47 |
+
- type: textarea
|
48 |
+
id: logs
|
49 |
+
attributes:
|
50 |
+
label: Relevant log output
|
51 |
+
description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
|
52 |
+
render: shell
|
llama.cpp/.github/ISSUE_TEMPLATE/040-refactor.yml
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: Refactor (Maintainers)
|
2 |
+
description: Used to track refactoring opportunities.
|
3 |
+
title: "Refactor: "
|
4 |
+
labels: ["refactor"]
|
5 |
+
body:
|
6 |
+
- type: markdown
|
7 |
+
attributes:
|
8 |
+
value: |
|
9 |
+
Don't forget to [check for existing refactor issue tickets](https://github.com/ggerganov/llama.cpp/issues?q=is%3Aopen+is%3Aissue+label%3Arefactoring) in case it's already covered.
|
10 |
+
Also you may want to check [Pull request refactor label as well](https://github.com/ggerganov/llama.cpp/pulls?q=is%3Aopen+is%3Apr+label%3Arefactoring) for duplicates too.
|
11 |
+
|
12 |
+
- type: textarea
|
13 |
+
id: background-description
|
14 |
+
attributes:
|
15 |
+
label: Background Description
|
16 |
+
description: Please provide a detailed written description of the pain points you are trying to solve.
|
17 |
+
placeholder: Detailed description behind your motivation to request refactor
|
18 |
+
validations:
|
19 |
+
required: true
|
20 |
+
|
21 |
+
- type: textarea
|
22 |
+
id: possible-approaches
|
23 |
+
attributes:
|
24 |
+
label: Possible Refactor Approaches
|
25 |
+
description: If you have some idea of possible approaches to solve this problem. You may want to make it a todo list.
|
26 |
+
placeholder: Your idea of possible refactoring opportunity/approaches
|
27 |
+
validations:
|
28 |
+
required: false
|
llama.cpp/.github/ISSUE_TEMPLATE/config.yml
ADDED
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
blank_issues_enabled: true
|
2 |
+
contact_links:
|
3 |
+
- name: Got an idea?
|
4 |
+
url: https://github.com/ggerganov/llama.cpp/discussions/categories/ideas
|
5 |
+
about: Pop it there. It may then become an enhancement ticket.
|
6 |
+
- name: Got a question?
|
7 |
+
url: https://github.com/ggerganov/llama.cpp/discussions/categories/q-a
|
8 |
+
about: Ask a question there!
|
9 |
+
- name: Want to contribute?
|
10 |
+
url: https://github.com/ggerganov/llama.cpp/wiki/contribute
|
11 |
+
about: Head to the contribution guide page of the wiki for areas you can help with
|
llama.cpp/.github/labeler.yml
ADDED
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# https://github.com/actions/labeler
|
2 |
+
Kompute:
|
3 |
+
- changed-files:
|
4 |
+
- any-glob-to-any-file:
|
5 |
+
- ggml/include/ggml-kompute.h
|
6 |
+
- ggml/src/ggml-kompute/**
|
7 |
+
- README-kompute.md
|
8 |
+
Apple Metal:
|
9 |
+
- changed-files:
|
10 |
+
- any-glob-to-any-file:
|
11 |
+
- ggml/include/ggml-metal.h
|
12 |
+
- ggml/src/ggml-metal/**
|
13 |
+
- README-metal.md
|
14 |
+
SYCL:
|
15 |
+
- changed-files:
|
16 |
+
- any-glob-to-any-file:
|
17 |
+
- ggml/include/ggml-sycl.h
|
18 |
+
- ggml/src/ggml-sycl/**
|
19 |
+
- docs/backend/SYCL.md
|
20 |
+
- examples/sycl/**
|
21 |
+
Nvidia GPU:
|
22 |
+
- changed-files:
|
23 |
+
- any-glob-to-any-file:
|
24 |
+
- ggml/include/ggml-cuda.h
|
25 |
+
- ggml/src/ggml-cuda/**
|
26 |
+
Vulkan:
|
27 |
+
- changed-files:
|
28 |
+
- any-glob-to-any-file:
|
29 |
+
- ggml/include/ggml-vulkan.h
|
30 |
+
- ggml/src/ggml-vulkan/**
|
31 |
+
documentation:
|
32 |
+
- changed-files:
|
33 |
+
- any-glob-to-any-file:
|
34 |
+
- docs/**
|
35 |
+
- media/**
|
36 |
+
testing:
|
37 |
+
- changed-files:
|
38 |
+
- any-glob-to-any-file:
|
39 |
+
- tests/**
|
40 |
+
build:
|
41 |
+
- changed-files:
|
42 |
+
- any-glob-to-any-file:
|
43 |
+
- cmake/**
|
44 |
+
- CMakeLists.txt
|
45 |
+
- CMakePresets.json
|
46 |
+
examples:
|
47 |
+
- changed-files:
|
48 |
+
- any-glob-to-any-file: examples/**
|
49 |
+
devops:
|
50 |
+
- changed-files:
|
51 |
+
- any-glob-to-any-file:
|
52 |
+
- .devops/**
|
53 |
+
- .github/**
|
54 |
+
- ci/**
|
55 |
+
python:
|
56 |
+
- changed-files:
|
57 |
+
- any-glob-to-any-file:
|
58 |
+
- "**/*.py"
|
59 |
+
- requirements/**
|
60 |
+
- gguf-py/**
|
61 |
+
- .flake8
|
62 |
+
script:
|
63 |
+
- changed-files:
|
64 |
+
- any-glob-to-any-file:
|
65 |
+
- scripts/**
|
66 |
+
android:
|
67 |
+
- changed-files:
|
68 |
+
- any-glob-to-any-file:
|
69 |
+
- examples/llama.android/**
|
70 |
+
server:
|
71 |
+
- changed-files:
|
72 |
+
- any-glob-to-any-file:
|
73 |
+
- examples/server/**
|
74 |
+
ggml:
|
75 |
+
- changed-files:
|
76 |
+
- any-glob-to-any-file:
|
77 |
+
- ggml/**
|
78 |
+
nix:
|
79 |
+
- changed-files:
|
80 |
+
- any-glob-to-any-file:
|
81 |
+
- "**/*.nix"
|
82 |
+
- .github/workflows/nix-*.yml
|
83 |
+
- .devops/nix/nixpkgs-instances.nix
|
84 |
+
embedding:
|
85 |
+
- changed-files:
|
86 |
+
- any-glob-to-any-file: examples/embedding/
|
llama.cpp/.github/pull_request_template.md
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
*Make sure to read the [contributing guidelines](https://github.com/ggerganov/llama.cpp/blob/master/CONTRIBUTING.md) before submitting a PR*
|
llama.cpp/.github/workflows/bench.yml.disabled
ADDED
@@ -0,0 +1,315 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# TODO: there have been some issues with the workflow, so disabling for now
|
2 |
+
# https://github.com/ggerganov/llama.cpp/issues/7893
|
3 |
+
#
|
4 |
+
# Benchmark
|
5 |
+
name: Benchmark
|
6 |
+
|
7 |
+
on:
|
8 |
+
workflow_dispatch:
|
9 |
+
inputs:
|
10 |
+
gpu-series:
|
11 |
+
description: 'Azure GPU series to run with'
|
12 |
+
required: true
|
13 |
+
type: choice
|
14 |
+
options:
|
15 |
+
- Standard_NC4as_T4_v3
|
16 |
+
- Standard_NC24ads_A100_v4
|
17 |
+
- Standard_NC80adis_H100_v5
|
18 |
+
sha:
|
19 |
+
description: 'Commit SHA1 to build'
|
20 |
+
required: false
|
21 |
+
type: string
|
22 |
+
duration:
|
23 |
+
description: 'Duration of the bench'
|
24 |
+
type: string
|
25 |
+
default: 10m
|
26 |
+
|
27 |
+
push:
|
28 |
+
branches:
|
29 |
+
- master
|
30 |
+
paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
|
31 |
+
pull_request_target:
|
32 |
+
types: [opened, synchronize, reopened]
|
33 |
+
paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
|
34 |
+
schedule:
|
35 |
+
- cron: '04 2 * * *'
|
36 |
+
|
37 |
+
concurrency:
|
38 |
+
group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }}-${{ github.event.inputs.sha }}
|
39 |
+
cancel-in-progress: true
|
40 |
+
|
41 |
+
jobs:
|
42 |
+
bench-server-baseline:
|
43 |
+
runs-on: Standard_NC4as_T4_v3
|
44 |
+
env:
|
45 |
+
RUNNER_LABEL: Standard_NC4as_T4_v3 # FIXME Do not find a way to not duplicate it
|
46 |
+
N_USERS: 8
|
47 |
+
DURATION: 10m
|
48 |
+
|
49 |
+
strategy:
|
50 |
+
matrix:
|
51 |
+
model: [phi-2]
|
52 |
+
ftype: [q4_0, q8_0, f16]
|
53 |
+
include:
|
54 |
+
- model: phi-2
|
55 |
+
ftype: q4_0
|
56 |
+
pr_comment_enabled: "true"
|
57 |
+
|
58 |
+
if: |
|
59 |
+
inputs.gpu-series == 'Standard_NC4as_T4_v3'
|
60 |
+
|| (
|
61 |
+
github.event_name == 'schedule'
|
62 |
+
&& github.ref_name == 'master'
|
63 |
+
&& github.repository_owner == 'ggerganov'
|
64 |
+
)
|
65 |
+
|| github.event_name == 'pull_request_target'
|
66 |
+
|| (
|
67 |
+
github.event_name == 'push'
|
68 |
+
&& github.event.ref == 'refs/heads/master'
|
69 |
+
&& github.repository_owner == 'ggerganov'
|
70 |
+
)
|
71 |
+
steps:
|
72 |
+
- name: Clone
|
73 |
+
id: checkout
|
74 |
+
uses: actions/checkout@v4
|
75 |
+
with:
|
76 |
+
fetch-depth: 0
|
77 |
+
ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
|
78 |
+
|
79 |
+
- name: Install python env
|
80 |
+
id: pipenv
|
81 |
+
run: |
|
82 |
+
cd examples/server/bench
|
83 |
+
python3 -m venv venv
|
84 |
+
source venv/bin/activate
|
85 |
+
pip install -r requirements.txt
|
86 |
+
|
87 |
+
- name: Prometheus
|
88 |
+
id: install_prometheus
|
89 |
+
run: |
|
90 |
+
wget --quiet https://github.com/prometheus/prometheus/releases/download/v2.51.0/prometheus-2.51.0.linux-amd64.tar.gz
|
91 |
+
tar xzf prometheus*.tar.gz --strip-components=1
|
92 |
+
./prometheus --config.file=examples/server/bench/prometheus.yml &
|
93 |
+
while ! nc -z localhost 9090; do
|
94 |
+
sleep 0.1
|
95 |
+
done
|
96 |
+
|
97 |
+
- name: Set up Go
|
98 |
+
uses: actions/setup-go@v5
|
99 |
+
with:
|
100 |
+
go-version: '1.21'
|
101 |
+
|
102 |
+
- name: Install k6 and xk6-sse
|
103 |
+
id: k6_installation
|
104 |
+
run: |
|
105 |
+
cd examples/server/bench
|
106 |
+
go install go.k6.io/xk6/cmd/xk6@latest
|
107 |
+
xk6 build master \
|
108 |
+
--with github.com/phymbert/xk6-sse
|
109 |
+
|
110 |
+
- name: Build
|
111 |
+
id: cmake_build
|
112 |
+
run: |
|
113 |
+
set -eux
|
114 |
+
cmake -B build \
|
115 |
+
-DGGML_NATIVE=OFF \
|
116 |
+
-DLLAMA_BUILD_SERVER=ON \
|
117 |
+
-DLLAMA_CURL=ON \
|
118 |
+
-DLLAMA_CUBLAS=ON \
|
119 |
+
-DCUDAToolkit_ROOT=/usr/local/cuda \
|
120 |
+
-DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc \
|
121 |
+
-DCMAKE_CUDA_ARCHITECTURES=75 \
|
122 |
+
-DLLAMA_FATAL_WARNINGS=OFF \
|
123 |
+
-DLLAMA_ALL_WARNINGS=OFF \
|
124 |
+
-DCMAKE_BUILD_TYPE=Release;
|
125 |
+
cmake --build build --config Release -j $(nproc) --target llama-server
|
126 |
+
|
127 |
+
- name: Download the dataset
|
128 |
+
id: download_dataset
|
129 |
+
run: |
|
130 |
+
cd examples/server/bench
|
131 |
+
wget --quiet https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json
|
132 |
+
|
133 |
+
- name: Server bench
|
134 |
+
id: server_bench
|
135 |
+
env:
|
136 |
+
HEAD_REF: ${{ github.head_ref || github.ref_name }}
|
137 |
+
run: |
|
138 |
+
set -eux
|
139 |
+
|
140 |
+
cd examples/server/bench
|
141 |
+
source venv/bin/activate
|
142 |
+
python bench.py \
|
143 |
+
--runner-label ${{ env.RUNNER_LABEL }} \
|
144 |
+
--name ${{ github.job }} \
|
145 |
+
--branch $HEAD_REF \
|
146 |
+
--commit ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha }} \
|
147 |
+
--scenario script.js \
|
148 |
+
--duration ${{ github.event.inputs.duration || env.DURATION }} \
|
149 |
+
--hf-repo ggml-org/models \
|
150 |
+
--hf-file ${{ matrix.model }}/ggml-model-${{ matrix.ftype }}.gguf \
|
151 |
+
--model-path-prefix /models \
|
152 |
+
--parallel ${{ env.N_USERS }} \
|
153 |
+
-ngl 33 \
|
154 |
+
--batch-size 2048 \
|
155 |
+
--ubatch-size 256 \
|
156 |
+
--ctx-size 16384 \
|
157 |
+
--n-prompts 1000 \
|
158 |
+
--max-prompt-tokens 1024 \
|
159 |
+
--max-tokens 2048
|
160 |
+
|
161 |
+
cat results.github.env >> $GITHUB_ENV
|
162 |
+
|
163 |
+
# Remove dataset as we do not want it in the artefact
|
164 |
+
rm ShareGPT_V3_unfiltered_cleaned_split.json
|
165 |
+
|
166 |
+
- uses: actions/upload-artifact@v4
|
167 |
+
with:
|
168 |
+
name: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
|
169 |
+
compression-level: 9
|
170 |
+
path: |
|
171 |
+
examples/server/bench/*.jpg
|
172 |
+
examples/server/bench/*.json
|
173 |
+
examples/server/bench/*.log
|
174 |
+
|
175 |
+
- name: Commit status
|
176 |
+
uses: Sibz/github-status-action@v1
|
177 |
+
with:
|
178 |
+
authToken: ${{secrets.GITHUB_TOKEN}}
|
179 |
+
sha: ${{ inputs.sha || github.event.pull_request.head.sha || github.sha }}
|
180 |
+
context: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
|
181 |
+
description: |
|
182 |
+
${{ env.BENCH_RESULTS }}
|
183 |
+
state: 'success'
|
184 |
+
|
185 |
+
- name: Upload benchmark images
|
186 |
+
uses: devicons/[email protected]
|
187 |
+
continue-on-error: true # Important as it looks unstable: 503
|
188 |
+
id: imgur_step
|
189 |
+
with:
|
190 |
+
client_id: ${{secrets.IMGUR_CLIENT_ID}}
|
191 |
+
path: |
|
192 |
+
examples/server/bench/prompt_tokens_seconds.jpg
|
193 |
+
examples/server/bench/predicted_tokens_seconds.jpg
|
194 |
+
examples/server/bench/kv_cache_usage_ratio.jpg
|
195 |
+
examples/server/bench/requests_processing.jpg
|
196 |
+
|
197 |
+
- name: Extract mermaid
|
198 |
+
id: set_mermaid
|
199 |
+
run: |
|
200 |
+
set -eux
|
201 |
+
|
202 |
+
cd examples/server/bench
|
203 |
+
PROMPT_TOKENS_SECONDS=$(cat prompt_tokens_seconds.mermaid)
|
204 |
+
echo "PROMPT_TOKENS_SECONDS<<EOF" >> $GITHUB_ENV
|
205 |
+
echo "$PROMPT_TOKENS_SECONDS" >> $GITHUB_ENV
|
206 |
+
echo "EOF" >> $GITHUB_ENV
|
207 |
+
|
208 |
+
PREDICTED_TOKENS_SECONDS=$(cat predicted_tokens_seconds.mermaid)
|
209 |
+
echo "PREDICTED_TOKENS_SECONDS<<EOF" >> $GITHUB_ENV
|
210 |
+
echo "$PREDICTED_TOKENS_SECONDS" >> $GITHUB_ENV
|
211 |
+
echo "EOF" >> $GITHUB_ENV
|
212 |
+
|
213 |
+
KV_CACHE_USAGE_RATIO=$(cat kv_cache_usage_ratio.mermaid)
|
214 |
+
echo "KV_CACHE_USAGE_RATIO<<EOF" >> $GITHUB_ENV
|
215 |
+
echo "$KV_CACHE_USAGE_RATIO" >> $GITHUB_ENV
|
216 |
+
echo "EOF" >> $GITHUB_ENV
|
217 |
+
|
218 |
+
REQUESTS_PROCESSING=$(cat requests_processing.mermaid)
|
219 |
+
echo "REQUESTS_PROCESSING<<EOF" >> $GITHUB_ENV
|
220 |
+
echo "$REQUESTS_PROCESSING" >> $GITHUB_ENV
|
221 |
+
echo "EOF" >> $GITHUB_ENV
|
222 |
+
|
223 |
+
- name: Extract image url
|
224 |
+
id: extract_image_url
|
225 |
+
continue-on-error: true
|
226 |
+
run: |
|
227 |
+
set -eux
|
228 |
+
|
229 |
+
echo "IMAGE_O=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[0] }}" >> $GITHUB_ENV
|
230 |
+
echo "IMAGE_1=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[1] }}" >> $GITHUB_ENV
|
231 |
+
echo "IMAGE_2=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[2] }}" >> $GITHUB_ENV
|
232 |
+
echo "IMAGE_3=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[3] }}" >> $GITHUB_ENV
|
233 |
+
|
234 |
+
- name: Comment PR
|
235 |
+
uses: mshick/add-pr-comment@v2
|
236 |
+
id: comment_pr
|
237 |
+
if: ${{ github.event.pull_request != '' && matrix.pr_comment_enabled == 'true' }}
|
238 |
+
with:
|
239 |
+
message-id: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
|
240 |
+
message: |
|
241 |
+
<p align="center">
|
242 |
+
|
243 |
+
📈 **llama.cpp server** for _${{ github.job }}_ on _${{ env.RUNNER_LABEL }}_ for `${{ matrix.model }}`-`${{ matrix.ftype }}`: **${{ env.BENCH_ITERATIONS}} iterations** 🚀
|
244 |
+
|
245 |
+
</p>
|
246 |
+
|
247 |
+
<details>
|
248 |
+
|
249 |
+
<summary>Expand details for performance related PR only</summary>
|
250 |
+
|
251 |
+
- Concurrent users: ${{ env.N_USERS }}, duration: ${{ github.event.inputs.duration || env.DURATION }}
|
252 |
+
- HTTP request : avg=${{ env.HTTP_REQ_DURATION_AVG }}ms p(95)=${{ env.HTTP_REQ_DURATION_P_95_ }}ms fails=${{ env.HTTP_REQ_FAILED_PASSES }}, finish reason: stop=${{ env.LLAMACPP_COMPLETIONS_STOP_RATE_PASSES }} truncated=${{ env.LLAMACPP_COMPLETIONS_TRUNCATED_RATE_PASSES }}
|
253 |
+
- Prompt processing (pp): avg=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_AVG }}tk/s p(95)=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_P_95_ }}tk/s
|
254 |
+
- Token generation (tg): avg=${{ env.LLAMACPP_TOKENS_SECOND_AVG }}tk/s p(95)=${{ env.LLAMACPP_TOKENS_SECOND_P_95_ }}tk/s
|
255 |
+
- ${{ env.BENCH_GRAPH_XLABEL }}
|
256 |
+
|
257 |
+
|
258 |
+
<p align="center">
|
259 |
+
|
260 |
+
<img width="100%" height="100%" src="${{ env.IMAGE_O }}" alt="prompt_tokens_seconds" />
|
261 |
+
|
262 |
+
<details>
|
263 |
+
|
264 |
+
<summary>More</summary>
|
265 |
+
|
266 |
+
```mermaid
|
267 |
+
${{ env.PROMPT_TOKENS_SECONDS }}
|
268 |
+
```
|
269 |
+
|
270 |
+
</details>
|
271 |
+
|
272 |
+
<img width="100%" height="100%" src="${{ env.IMAGE_1 }}" alt="predicted_tokens_seconds"/>
|
273 |
+
|
274 |
+
<details>
|
275 |
+
<summary>More</summary>
|
276 |
+
|
277 |
+
```mermaid
|
278 |
+
${{ env.PREDICTED_TOKENS_SECONDS }}
|
279 |
+
```
|
280 |
+
|
281 |
+
</details>
|
282 |
+
|
283 |
+
</p>
|
284 |
+
|
285 |
+
<details>
|
286 |
+
|
287 |
+
<summary>Details</summary>
|
288 |
+
|
289 |
+
<p align="center">
|
290 |
+
|
291 |
+
<img width="100%" height="100%" src="${{ env.IMAGE_2 }}" alt="kv_cache_usage_ratio" />
|
292 |
+
|
293 |
+
<details>
|
294 |
+
<summary>More</summary>
|
295 |
+
|
296 |
+
```mermaid
|
297 |
+
${{ env.KV_CACHE_USAGE_RATIO }}
|
298 |
+
```
|
299 |
+
|
300 |
+
</details>
|
301 |
+
|
302 |
+
<img width="100%" height="100%" src="${{ env.IMAGE_3 }}" alt="requests_processing"/>
|
303 |
+
|
304 |
+
<details>
|
305 |
+
<summary>More</summary>
|
306 |
+
|
307 |
+
```mermaid
|
308 |
+
${{ env.REQUESTS_PROCESSING }}
|
309 |
+
```
|
310 |
+
|
311 |
+
</details>
|
312 |
+
|
313 |
+
</p>
|
314 |
+
</details>
|
315 |
+
</details>
|
llama.cpp/.github/workflows/build.yml
ADDED
@@ -0,0 +1,1416 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: CI
|
2 |
+
|
3 |
+
on:
|
4 |
+
workflow_dispatch: # allows manual triggering
|
5 |
+
inputs:
|
6 |
+
create_release:
|
7 |
+
description: 'Create new release'
|
8 |
+
required: true
|
9 |
+
type: boolean
|
10 |
+
push:
|
11 |
+
branches:
|
12 |
+
- master
|
13 |
+
paths: ['.github/workflows/build.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal']
|
14 |
+
pull_request:
|
15 |
+
types: [opened, synchronize, reopened]
|
16 |
+
paths: ['.github/workflows/build.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal']
|
17 |
+
|
18 |
+
concurrency:
|
19 |
+
group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
|
20 |
+
cancel-in-progress: true
|
21 |
+
|
22 |
+
# Fine-grant permission
|
23 |
+
# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
|
24 |
+
permissions:
|
25 |
+
contents: write # for creating release
|
26 |
+
|
27 |
+
env:
|
28 |
+
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
|
29 |
+
GGML_NLOOP: 3
|
30 |
+
GGML_N_THREADS: 1
|
31 |
+
LLAMA_LOG_COLORS: 1
|
32 |
+
LLAMA_LOG_PREFIX: 1
|
33 |
+
LLAMA_LOG_TIMESTAMPS: 1
|
34 |
+
|
35 |
+
jobs:
|
36 |
+
macOS-latest-cmake-arm64:
|
37 |
+
runs-on: macos-14
|
38 |
+
|
39 |
+
steps:
|
40 |
+
- name: Clone
|
41 |
+
id: checkout
|
42 |
+
uses: actions/checkout@v4
|
43 |
+
with:
|
44 |
+
fetch-depth: 0
|
45 |
+
|
46 |
+
- name: Dependencies
|
47 |
+
id: depends
|
48 |
+
continue-on-error: true
|
49 |
+
run: |
|
50 |
+
brew update
|
51 |
+
|
52 |
+
- name: Build
|
53 |
+
id: cmake_build
|
54 |
+
run: |
|
55 |
+
sysctl -a
|
56 |
+
mkdir build
|
57 |
+
cd build
|
58 |
+
cmake .. \
|
59 |
+
-DLLAMA_FATAL_WARNINGS=ON \
|
60 |
+
-DLLAMA_CURL=ON \
|
61 |
+
-DGGML_METAL_USE_BF16=ON \
|
62 |
+
-DGGML_METAL_EMBED_LIBRARY=ON \
|
63 |
+
-DGGML_RPC=ON \
|
64 |
+
-DBUILD_SHARED_LIBS=OFF
|
65 |
+
cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
|
66 |
+
|
67 |
+
- name: Test
|
68 |
+
id: cmake_test
|
69 |
+
run: |
|
70 |
+
cd build
|
71 |
+
ctest -L 'main|curl' --verbose --timeout 900
|
72 |
+
|
73 |
+
- name: Determine tag name
|
74 |
+
id: tag
|
75 |
+
shell: bash
|
76 |
+
run: |
|
77 |
+
BUILD_NUMBER="$(git rev-list --count HEAD)"
|
78 |
+
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
|
79 |
+
if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
|
80 |
+
echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
|
81 |
+
else
|
82 |
+
SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
|
83 |
+
echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
|
84 |
+
fi
|
85 |
+
|
86 |
+
- name: Pack artifacts
|
87 |
+
id: pack_artifacts
|
88 |
+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
89 |
+
run: |
|
90 |
+
cp LICENSE ./build/bin/
|
91 |
+
zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip ./build/bin/*
|
92 |
+
|
93 |
+
- name: Upload artifacts
|
94 |
+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
95 |
+
uses: actions/upload-artifact@v4
|
96 |
+
with:
|
97 |
+
path: llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip
|
98 |
+
name: llama-bin-macos-arm64.zip
|
99 |
+
|
100 |
+
macOS-latest-cmake-x64:
|
101 |
+
runs-on: macos-13
|
102 |
+
|
103 |
+
steps:
|
104 |
+
- name: Clone
|
105 |
+
id: checkout
|
106 |
+
uses: actions/checkout@v4
|
107 |
+
with:
|
108 |
+
fetch-depth: 0
|
109 |
+
|
110 |
+
- name: Dependencies
|
111 |
+
id: depends
|
112 |
+
continue-on-error: true
|
113 |
+
run: |
|
114 |
+
brew update
|
115 |
+
|
116 |
+
- name: Build
|
117 |
+
id: cmake_build
|
118 |
+
run: |
|
119 |
+
sysctl -a
|
120 |
+
# Metal is disabled due to intermittent failures with Github runners not having a GPU:
|
121 |
+
# https://github.com/ggerganov/llama.cpp/actions/runs/8635935781/job/23674807267#step:5:2313
|
122 |
+
cmake -B build \
|
123 |
+
-DLLAMA_FATAL_WARNINGS=ON \
|
124 |
+
-DLLAMA_CURL=ON \
|
125 |
+
-DGGML_METAL=OFF \
|
126 |
+
-DGGML_RPC=ON \
|
127 |
+
-DBUILD_SHARED_LIBS=OFF
|
128 |
+
cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
|
129 |
+
|
130 |
+
- name: Test
|
131 |
+
id: cmake_test
|
132 |
+
run: |
|
133 |
+
cd build
|
134 |
+
ctest -L main --verbose --timeout 900
|
135 |
+
|
136 |
+
- name: Determine tag name
|
137 |
+
id: tag
|
138 |
+
shell: bash
|
139 |
+
run: |
|
140 |
+
BUILD_NUMBER="$(git rev-list --count HEAD)"
|
141 |
+
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
|
142 |
+
if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
|
143 |
+
echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
|
144 |
+
else
|
145 |
+
SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
|
146 |
+
echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
|
147 |
+
fi
|
148 |
+
|
149 |
+
- name: Pack artifacts
|
150 |
+
id: pack_artifacts
|
151 |
+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
152 |
+
run: |
|
153 |
+
cp LICENSE ./build/bin/
|
154 |
+
zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip ./build/bin/*
|
155 |
+
|
156 |
+
- name: Upload artifacts
|
157 |
+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
158 |
+
uses: actions/upload-artifact@v4
|
159 |
+
with:
|
160 |
+
path: llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip
|
161 |
+
name: llama-bin-macos-x64.zip
|
162 |
+
|
163 |
+
ubuntu-latest-cmake:
|
164 |
+
runs-on: ubuntu-latest
|
165 |
+
|
166 |
+
steps:
|
167 |
+
- name: Clone
|
168 |
+
id: checkout
|
169 |
+
uses: actions/checkout@v4
|
170 |
+
with:
|
171 |
+
fetch-depth: 0
|
172 |
+
|
173 |
+
- name: Dependencies
|
174 |
+
id: depends
|
175 |
+
run: |
|
176 |
+
sudo apt-get update
|
177 |
+
sudo apt-get install build-essential libcurl4-openssl-dev
|
178 |
+
|
179 |
+
- name: Build
|
180 |
+
id: cmake_build
|
181 |
+
run: |
|
182 |
+
mkdir build
|
183 |
+
cd build
|
184 |
+
cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_CURL=ON -DGGML_RPC=ON -DBUILD_SHARED_LIBS=OFF
|
185 |
+
cmake --build . --config Release -j $(nproc)
|
186 |
+
|
187 |
+
- name: Test
|
188 |
+
id: cmake_test
|
189 |
+
run: |
|
190 |
+
cd build
|
191 |
+
ctest -L 'main|curl' --verbose --timeout 900
|
192 |
+
|
193 |
+
- name: Test llama2c conversion
|
194 |
+
id: llama2c_test
|
195 |
+
run: |
|
196 |
+
cd build
|
197 |
+
echo "Fetch tokenizer"
|
198 |
+
wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/tok512.bin
|
199 |
+
echo "Fetch llama2c model"
|
200 |
+
wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/stories260K.bin
|
201 |
+
./bin/llama-convert-llama2c-to-ggml --copy-vocab-from-model ./tok512.bin --llama2c-model stories260K.bin --llama2c-output-model stories260K.gguf
|
202 |
+
./bin/llama-cli -m stories260K.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256
|
203 |
+
|
204 |
+
- name: Determine tag name
|
205 |
+
id: tag
|
206 |
+
shell: bash
|
207 |
+
run: |
|
208 |
+
BUILD_NUMBER="$(git rev-list --count HEAD)"
|
209 |
+
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
|
210 |
+
if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
|
211 |
+
echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
|
212 |
+
else
|
213 |
+
SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
|
214 |
+
echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
|
215 |
+
fi
|
216 |
+
|
217 |
+
- name: Pack artifacts
|
218 |
+
id: pack_artifacts
|
219 |
+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
220 |
+
run: |
|
221 |
+
cp LICENSE ./build/bin/
|
222 |
+
zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-x64.zip ./build/bin/*
|
223 |
+
|
224 |
+
- name: Upload artifacts
|
225 |
+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
226 |
+
uses: actions/upload-artifact@v4
|
227 |
+
with:
|
228 |
+
path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-x64.zip
|
229 |
+
name: llama-bin-ubuntu-x64.zip
|
230 |
+
|
231 |
+
ubuntu-latest-cmake-sanitizer:
|
232 |
+
runs-on: ubuntu-latest
|
233 |
+
|
234 |
+
continue-on-error: true
|
235 |
+
|
236 |
+
strategy:
|
237 |
+
matrix:
|
238 |
+
sanitizer: [ADDRESS, THREAD, UNDEFINED]
|
239 |
+
build_type: [Debug, Release]
|
240 |
+
|
241 |
+
steps:
|
242 |
+
- name: Clone
|
243 |
+
id: checkout
|
244 |
+
uses: actions/checkout@v4
|
245 |
+
|
246 |
+
- name: Dependencies
|
247 |
+
id: depends
|
248 |
+
run: |
|
249 |
+
sudo apt-get update
|
250 |
+
sudo apt-get install build-essential
|
251 |
+
|
252 |
+
- name: Build
|
253 |
+
id: cmake_build
|
254 |
+
if: ${{ matrix.sanitizer != 'THREAD' }}
|
255 |
+
run: |
|
256 |
+
mkdir build
|
257 |
+
cd build
|
258 |
+
cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON -DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
|
259 |
+
cmake --build . --config ${{ matrix.build_type }} -j $(nproc)
|
260 |
+
|
261 |
+
- name: Build (no OpenMP)
|
262 |
+
id: cmake_build_no_openmp
|
263 |
+
if: ${{ matrix.sanitizer == 'THREAD' }}
|
264 |
+
run: |
|
265 |
+
mkdir build
|
266 |
+
cd build
|
267 |
+
cmake .. -DLLAMA_FATAL_WARNINGS=ON -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} -DGGML_OPENMP=OFF
|
268 |
+
cmake --build . --config ${{ matrix.build_type }} -j $(nproc)
|
269 |
+
|
270 |
+
- name: Test
|
271 |
+
id: cmake_test
|
272 |
+
run: |
|
273 |
+
cd build
|
274 |
+
ctest -L main --verbose --timeout 900
|
275 |
+
|
276 |
+
ubuntu-latest-cmake-rpc:
|
277 |
+
runs-on: ubuntu-latest
|
278 |
+
|
279 |
+
continue-on-error: true
|
280 |
+
|
281 |
+
steps:
|
282 |
+
- name: Clone
|
283 |
+
id: checkout
|
284 |
+
uses: actions/checkout@v4
|
285 |
+
|
286 |
+
- name: Dependencies
|
287 |
+
id: depends
|
288 |
+
run: |
|
289 |
+
sudo apt-get update
|
290 |
+
sudo apt-get install build-essential
|
291 |
+
|
292 |
+
- name: Build
|
293 |
+
id: cmake_build
|
294 |
+
run: |
|
295 |
+
mkdir build
|
296 |
+
cd build
|
297 |
+
cmake -DGGML_RPC=ON ..
|
298 |
+
cmake --build . --config Release -j $(nproc)
|
299 |
+
|
300 |
+
- name: Test
|
301 |
+
id: cmake_test
|
302 |
+
run: |
|
303 |
+
cd build
|
304 |
+
ctest -L main --verbose
|
305 |
+
|
306 |
+
ubuntu-22-cmake-vulkan:
|
307 |
+
runs-on: ubuntu-22.04
|
308 |
+
|
309 |
+
steps:
|
310 |
+
- name: Clone
|
311 |
+
id: checkout
|
312 |
+
uses: actions/checkout@v4
|
313 |
+
|
314 |
+
- name: Dependencies
|
315 |
+
id: depends
|
316 |
+
run: |
|
317 |
+
wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add -
|
318 |
+
sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
|
319 |
+
sudo apt-get update -y
|
320 |
+
sudo apt-get install -y build-essential vulkan-sdk
|
321 |
+
|
322 |
+
- name: Build
|
323 |
+
id: cmake_build
|
324 |
+
run: |
|
325 |
+
mkdir build
|
326 |
+
cd build
|
327 |
+
cmake -DGGML_VULKAN=ON ..
|
328 |
+
cmake --build . --config Release -j $(nproc)
|
329 |
+
|
330 |
+
ubuntu-22-cmake-hip:
|
331 |
+
runs-on: ubuntu-22.04
|
332 |
+
container: rocm/dev-ubuntu-22.04:6.0.2
|
333 |
+
|
334 |
+
steps:
|
335 |
+
- name: Clone
|
336 |
+
id: checkout
|
337 |
+
uses: actions/checkout@v4
|
338 |
+
|
339 |
+
- name: Dependencies
|
340 |
+
id: depends
|
341 |
+
run: |
|
342 |
+
sudo apt-get update
|
343 |
+
sudo apt-get install -y build-essential git cmake rocblas-dev hipblas-dev
|
344 |
+
|
345 |
+
- name: Build with native CMake HIP support
|
346 |
+
id: cmake_build
|
347 |
+
run: |
|
348 |
+
cmake -B build -S . -DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" -DGGML_HIP=ON
|
349 |
+
cmake --build build --config Release -j $(nproc)
|
350 |
+
|
351 |
+
- name: Build with legacy HIP support
|
352 |
+
id: cmake_build_legacy_hip
|
353 |
+
run: |
|
354 |
+
cmake -B build2 -S . -DCMAKE_C_COMPILER=hipcc -DCMAKE_CXX_COMPILER=hipcc -DGGML_HIP=ON
|
355 |
+
cmake --build build2 --config Release -j $(nproc)
|
356 |
+
|
357 |
+
ubuntu-22-cmake-musa:
|
358 |
+
runs-on: ubuntu-22.04
|
359 |
+
container: mthreads/musa:rc3.1.0-devel-ubuntu22.04
|
360 |
+
|
361 |
+
steps:
|
362 |
+
- name: Clone
|
363 |
+
id: checkout
|
364 |
+
uses: actions/checkout@v4
|
365 |
+
|
366 |
+
- name: Dependencies
|
367 |
+
id: depends
|
368 |
+
run: |
|
369 |
+
apt-get update
|
370 |
+
apt-get install -y build-essential git cmake libcurl4-openssl-dev
|
371 |
+
|
372 |
+
- name: Build with native CMake MUSA support
|
373 |
+
id: cmake_build
|
374 |
+
run: |
|
375 |
+
cmake -B build -S . -DGGML_MUSA=ON
|
376 |
+
cmake --build build --config Release -j $(nproc)
|
377 |
+
|
378 |
+
ubuntu-22-cmake-sycl:
|
379 |
+
runs-on: ubuntu-22.04
|
380 |
+
|
381 |
+
continue-on-error: true
|
382 |
+
|
383 |
+
steps:
|
384 |
+
- uses: actions/checkout@v4
|
385 |
+
|
386 |
+
- name: add oneAPI to apt
|
387 |
+
shell: bash
|
388 |
+
run: |
|
389 |
+
cd /tmp
|
390 |
+
wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
|
391 |
+
sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
|
392 |
+
rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
|
393 |
+
sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
|
394 |
+
|
395 |
+
- name: install oneAPI dpcpp compiler
|
396 |
+
shell: bash
|
397 |
+
run: |
|
398 |
+
sudo apt update
|
399 |
+
sudo apt install intel-oneapi-compiler-dpcpp-cpp
|
400 |
+
|
401 |
+
- name: install oneAPI MKL library
|
402 |
+
shell: bash
|
403 |
+
run: |
|
404 |
+
sudo apt install intel-oneapi-mkl-devel
|
405 |
+
|
406 |
+
- name: Clone
|
407 |
+
id: checkout
|
408 |
+
uses: actions/checkout@v4
|
409 |
+
|
410 |
+
- name: Build
|
411 |
+
id: cmake_build
|
412 |
+
run: |
|
413 |
+
source /opt/intel/oneapi/setvars.sh
|
414 |
+
mkdir build
|
415 |
+
cd build
|
416 |
+
cmake -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ..
|
417 |
+
cmake --build . --config Release -j $(nproc)
|
418 |
+
|
419 |
+
ubuntu-22-cmake-sycl-fp16:
|
420 |
+
runs-on: ubuntu-22.04
|
421 |
+
|
422 |
+
continue-on-error: true
|
423 |
+
|
424 |
+
steps:
|
425 |
+
- uses: actions/checkout@v4
|
426 |
+
|
427 |
+
- name: add oneAPI to apt
|
428 |
+
shell: bash
|
429 |
+
run: |
|
430 |
+
cd /tmp
|
431 |
+
wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
|
432 |
+
sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
|
433 |
+
rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
|
434 |
+
sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
|
435 |
+
|
436 |
+
- name: install oneAPI dpcpp compiler
|
437 |
+
shell: bash
|
438 |
+
run: |
|
439 |
+
sudo apt update
|
440 |
+
sudo apt install intel-oneapi-compiler-dpcpp-cpp
|
441 |
+
|
442 |
+
- name: install oneAPI MKL library
|
443 |
+
shell: bash
|
444 |
+
run: |
|
445 |
+
sudo apt install intel-oneapi-mkl-devel
|
446 |
+
|
447 |
+
- name: Clone
|
448 |
+
id: checkout
|
449 |
+
uses: actions/checkout@v4
|
450 |
+
|
451 |
+
- name: Build
|
452 |
+
id: cmake_build
|
453 |
+
run: |
|
454 |
+
source /opt/intel/oneapi/setvars.sh
|
455 |
+
mkdir build
|
456 |
+
cd build
|
457 |
+
cmake -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DGGML_SYCL_F16=ON ..
|
458 |
+
cmake --build . --config Release -j $(nproc)
|
459 |
+
|
460 |
+
# TODO: build with GGML_METAL=OFF because test-backend-ops fail on "Apple Paravirtual device" and I don't know
|
461 |
+
# how to debug it.
|
462 |
+
# ref: https://github.com/ggerganov/llama.cpp/actions/runs/7132125951/job/19422043567?pr=4359#step:5:6584
|
463 |
+
# would be great if we fix these
|
464 |
+
macOS-latest-cmake:
|
465 |
+
runs-on: macos-latest
|
466 |
+
|
467 |
+
steps:
|
468 |
+
- name: Clone
|
469 |
+
id: checkout
|
470 |
+
uses: actions/checkout@v4
|
471 |
+
|
472 |
+
- name: Dependencies
|
473 |
+
id: depends
|
474 |
+
continue-on-error: true
|
475 |
+
run: |
|
476 |
+
brew update
|
477 |
+
|
478 |
+
- name: Build
|
479 |
+
id: cmake_build
|
480 |
+
run: |
|
481 |
+
sysctl -a
|
482 |
+
mkdir build
|
483 |
+
cd build
|
484 |
+
cmake -DLLAMA_FATAL_WARNINGS=ON -DGGML_METAL=OFF ..
|
485 |
+
cmake --build . --config Release -j $(sysctl -n hw.logicalcpu)
|
486 |
+
|
487 |
+
- name: Test
|
488 |
+
id: cmake_test
|
489 |
+
run: |
|
490 |
+
cd build
|
491 |
+
ctest -L main --verbose --timeout 900
|
492 |
+
|
493 |
+
macOS-latest-cmake-ios:
|
494 |
+
runs-on: macos-latest
|
495 |
+
|
496 |
+
steps:
|
497 |
+
- name: Clone
|
498 |
+
id: checkout
|
499 |
+
uses: actions/checkout@v4
|
500 |
+
|
501 |
+
- name: Dependencies
|
502 |
+
id: depends
|
503 |
+
continue-on-error: true
|
504 |
+
run: |
|
505 |
+
brew update
|
506 |
+
|
507 |
+
- name: Build
|
508 |
+
id: cmake_build
|
509 |
+
run: |
|
510 |
+
sysctl -a
|
511 |
+
mkdir build
|
512 |
+
cd build
|
513 |
+
cmake -G Xcode .. \
|
514 |
+
-DGGML_METAL_USE_BF16=ON \
|
515 |
+
-DGGML_METAL_EMBED_LIBRARY=ON \
|
516 |
+
-DLLAMA_BUILD_EXAMPLES=OFF \
|
517 |
+
-DLLAMA_BUILD_TESTS=OFF \
|
518 |
+
-DLLAMA_BUILD_SERVER=OFF \
|
519 |
+
-DCMAKE_SYSTEM_NAME=iOS \
|
520 |
+
-DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
|
521 |
+
-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
|
522 |
+
cmake --build . --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
|
523 |
+
|
524 |
+
macOS-latest-cmake-tvos:
|
525 |
+
runs-on: macos-latest
|
526 |
+
|
527 |
+
steps:
|
528 |
+
- name: Clone
|
529 |
+
id: checkout
|
530 |
+
uses: actions/checkout@v4
|
531 |
+
|
532 |
+
- name: Dependencies
|
533 |
+
id: depends
|
534 |
+
continue-on-error: true
|
535 |
+
run: |
|
536 |
+
brew update
|
537 |
+
|
538 |
+
- name: Build
|
539 |
+
id: cmake_build
|
540 |
+
run: |
|
541 |
+
sysctl -a
|
542 |
+
mkdir build
|
543 |
+
cd build
|
544 |
+
cmake -G Xcode .. \
|
545 |
+
-DGGML_METAL_USE_BF16=ON \
|
546 |
+
-DGGML_METAL_EMBED_LIBRARY=ON \
|
547 |
+
-DLLAMA_BUILD_EXAMPLES=OFF \
|
548 |
+
-DLLAMA_BUILD_TESTS=OFF \
|
549 |
+
-DLLAMA_BUILD_SERVER=OFF \
|
550 |
+
-DCMAKE_SYSTEM_NAME=tvOS \
|
551 |
+
-DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
|
552 |
+
-DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
|
553 |
+
cmake --build . --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
|
554 |
+
|
555 |
+
# TODO: tmp disabled. see for possible re-enable:
|
556 |
+
# https://github.com/ggerganov/llama.cpp/pull/10525
|
557 |
+
# macOS-latest-swift:
|
558 |
+
# runs-on: macos-latest
|
559 |
+
#
|
560 |
+
# strategy:
|
561 |
+
# matrix:
|
562 |
+
# destination: ['generic/platform=macOS', 'generic/platform=iOS', 'generic/platform=tvOS']
|
563 |
+
#
|
564 |
+
# steps:
|
565 |
+
# - name: Clone
|
566 |
+
# id: checkout
|
567 |
+
# uses: actions/checkout@v4
|
568 |
+
#
|
569 |
+
# - name: Dependencies
|
570 |
+
# id: depends
|
571 |
+
# continue-on-error: true
|
572 |
+
# run: |
|
573 |
+
# brew update
|
574 |
+
#
|
575 |
+
# - name: xcodebuild for swift package
|
576 |
+
# id: xcodebuild
|
577 |
+
# run: |
|
578 |
+
# xcodebuild -scheme llama -destination "${{ matrix.destination }}"
|
579 |
+
#
|
580 |
+
# - name: Build Swift Example
|
581 |
+
# id: make_build_swift_example
|
582 |
+
# run: |
|
583 |
+
# make swift
|
584 |
+
|
585 |
+
windows-msys2:
|
586 |
+
runs-on: windows-latest
|
587 |
+
|
588 |
+
strategy:
|
589 |
+
fail-fast: false
|
590 |
+
matrix:
|
591 |
+
include:
|
592 |
+
- { sys: UCRT64, env: ucrt-x86_64, build: Release }
|
593 |
+
- { sys: CLANG64, env: clang-x86_64, build: Release }
|
594 |
+
|
595 |
+
steps:
|
596 |
+
- name: Clone
|
597 |
+
uses: actions/checkout@v4
|
598 |
+
|
599 |
+
- name: Setup ${{ matrix.sys }}
|
600 |
+
uses: msys2/setup-msys2@v2
|
601 |
+
with:
|
602 |
+
update: true
|
603 |
+
msystem: ${{matrix.sys}}
|
604 |
+
install: >-
|
605 |
+
base-devel
|
606 |
+
mingw-w64-${{matrix.env}}-toolchain
|
607 |
+
mingw-w64-${{matrix.env}}-cmake
|
608 |
+
mingw-w64-${{matrix.env}}-openblas
|
609 |
+
|
610 |
+
- name: Build using CMake
|
611 |
+
shell: msys2 {0}
|
612 |
+
run: |
|
613 |
+
cmake -B build
|
614 |
+
cmake --build build --config ${{ matrix.build }} -j $(nproc)
|
615 |
+
|
616 |
+
- name: Clean after building using CMake
|
617 |
+
shell: msys2 {0}
|
618 |
+
run: |
|
619 |
+
rm -rf build
|
620 |
+
|
621 |
+
- name: Build using CMake w/ OpenBLAS
|
622 |
+
shell: msys2 {0}
|
623 |
+
run: |
|
624 |
+
cmake -B build -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
|
625 |
+
cmake --build build --config ${{ matrix.build }} -j $(nproc)
|
626 |
+
|
627 |
+
windows-latest-cmake:
|
628 |
+
runs-on: windows-latest
|
629 |
+
|
630 |
+
env:
|
631 |
+
OPENBLAS_VERSION: 0.3.23
|
632 |
+
SDE_VERSION: 9.33.0-2024-01-07
|
633 |
+
VULKAN_VERSION: 1.3.261.1
|
634 |
+
|
635 |
+
strategy:
|
636 |
+
matrix:
|
637 |
+
include:
|
638 |
+
- build: 'noavx-x64'
|
639 |
+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF -DBUILD_SHARED_LIBS=ON'
|
640 |
+
- build: 'avx2-x64'
|
641 |
+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DBUILD_SHARED_LIBS=ON'
|
642 |
+
- build: 'avx-x64'
|
643 |
+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX2=OFF -DBUILD_SHARED_LIBS=ON'
|
644 |
+
- build: 'avx512-x64'
|
645 |
+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX512=ON -DBUILD_SHARED_LIBS=ON'
|
646 |
+
- build: 'openblas-x64'
|
647 |
+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BLAS=ON -DBUILD_SHARED_LIBS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
|
648 |
+
- build: 'kompute-x64'
|
649 |
+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON -DBUILD_SHARED_LIBS=ON'
|
650 |
+
- build: 'vulkan-x64'
|
651 |
+
defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_VULKAN=ON -DBUILD_SHARED_LIBS=ON'
|
652 |
+
- build: 'llvm-arm64'
|
653 |
+
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=ON'
|
654 |
+
- build: 'msvc-arm64'
|
655 |
+
defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-msvc.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DBUILD_SHARED_LIBS=ON'
|
656 |
+
|
657 |
+
steps:
|
658 |
+
- name: Clone
|
659 |
+
id: checkout
|
660 |
+
uses: actions/checkout@v4
|
661 |
+
with:
|
662 |
+
fetch-depth: 0
|
663 |
+
|
664 |
+
- name: Clone Kompute submodule
|
665 |
+
id: clone_kompute
|
666 |
+
if: ${{ matrix.build == 'kompute-x64' }}
|
667 |
+
run: |
|
668 |
+
git submodule update --init ggml/src/ggml-kompute/kompute
|
669 |
+
|
670 |
+
- name: Download OpenBLAS
|
671 |
+
id: get_openblas
|
672 |
+
if: ${{ matrix.build == 'openblas-x64' }}
|
673 |
+
run: |
|
674 |
+
curl.exe -o $env:RUNNER_TEMP/openblas.zip -L "https://github.com/xianyi/OpenBLAS/releases/download/v${env:OPENBLAS_VERSION}/OpenBLAS-${env:OPENBLAS_VERSION}-x64.zip"
|
675 |
+
curl.exe -o $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt -L "https://github.com/xianyi/OpenBLAS/raw/v${env:OPENBLAS_VERSION}/LICENSE"
|
676 |
+
mkdir $env:RUNNER_TEMP/openblas
|
677 |
+
tar.exe -xvf $env:RUNNER_TEMP/openblas.zip -C $env:RUNNER_TEMP/openblas
|
678 |
+
$vcdir = $(vswhere -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath)
|
679 |
+
$msvc = $(join-path $vcdir $('VC\Tools\MSVC\'+$(gc -raw $(join-path $vcdir 'VC\Auxiliary\Build\Microsoft.VCToolsVersion.default.txt')).Trim()))
|
680 |
+
$lib = $(join-path $msvc 'bin\Hostx64\x64\lib.exe')
|
681 |
+
& $lib /machine:x64 "/def:${env:RUNNER_TEMP}/openblas/lib/libopenblas.def" "/out:${env:RUNNER_TEMP}/openblas/lib/openblas.lib" /name:openblas.dll
|
682 |
+
|
683 |
+
- name: Install Vulkan SDK
|
684 |
+
id: get_vulkan
|
685 |
+
if: ${{ matrix.build == 'kompute-x64' || matrix.build == 'vulkan-x64' }}
|
686 |
+
run: |
|
687 |
+
curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/VulkanSDK-${env:VULKAN_VERSION}-Installer.exe"
|
688 |
+
& "$env:RUNNER_TEMP\VulkanSDK-Installer.exe" --accept-licenses --default-answer --confirm-command install
|
689 |
+
Add-Content $env:GITHUB_ENV "VULKAN_SDK=C:\VulkanSDK\${env:VULKAN_VERSION}"
|
690 |
+
Add-Content $env:GITHUB_PATH "C:\VulkanSDK\${env:VULKAN_VERSION}\bin"
|
691 |
+
|
692 |
+
- name: Install Ninja
|
693 |
+
id: install_ninja
|
694 |
+
run: |
|
695 |
+
choco install ninja
|
696 |
+
|
697 |
+
- name: Build
|
698 |
+
id: cmake_build
|
699 |
+
run: |
|
700 |
+
cmake -S . -B build ${{ matrix.defines }}
|
701 |
+
cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS}
|
702 |
+
|
703 |
+
- name: Add libopenblas.dll
|
704 |
+
id: add_libopenblas_dll
|
705 |
+
if: ${{ matrix.build == 'openblas-x64' }}
|
706 |
+
run: |
|
707 |
+
cp $env:RUNNER_TEMP/openblas/bin/libopenblas.dll ./build/bin/Release/openblas.dll
|
708 |
+
cp $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt ./build/bin/Release/OpenBLAS-${env:OPENBLAS_VERSION}.txt
|
709 |
+
|
710 |
+
- name: Check AVX512F support
|
711 |
+
id: check_avx512f
|
712 |
+
if: ${{ matrix.build == 'avx512-x64' }}
|
713 |
+
continue-on-error: true
|
714 |
+
run: |
|
715 |
+
cd build
|
716 |
+
$vcdir = $(vswhere -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath)
|
717 |
+
$msvc = $(join-path $vcdir $('VC\Tools\MSVC\'+$(gc -raw $(join-path $vcdir 'VC\Auxiliary\Build\Microsoft.VCToolsVersion.default.txt')).Trim()))
|
718 |
+
$cl = $(join-path $msvc 'bin\Hostx64\x64\cl.exe')
|
719 |
+
echo 'int main(void){unsigned int a[4];__cpuid(a,7);return !(a[1]&65536);}' >> avx512f.c
|
720 |
+
& $cl /O2 /GS- /kernel avx512f.c /link /nodefaultlib /entry:main
|
721 |
+
.\avx512f.exe && echo "AVX512F: YES" && ( echo HAS_AVX512F=1 >> $env:GITHUB_ENV ) || echo "AVX512F: NO"
|
722 |
+
|
723 |
+
- name: Test
|
724 |
+
id: cmake_test
|
725 |
+
# not all machines have native AVX-512
|
726 |
+
if: ${{ matrix.build != 'msvc-arm64' && matrix.build != 'llvm-arm64' && matrix.build != 'kompute-x64' && matrix.build != 'vulkan-x64' && (matrix.build != 'avx512-x64' || env.HAS_AVX512F == '1') }}
|
727 |
+
run: |
|
728 |
+
cd build
|
729 |
+
ctest -L main -C Release --verbose --timeout 900
|
730 |
+
|
731 |
+
- name: Test (Intel SDE)
|
732 |
+
id: cmake_test_sde
|
733 |
+
if: ${{ matrix.build == 'avx512-x64' && env.HAS_AVX512F == '0' }} # use Intel SDE for AVX-512 emulation
|
734 |
+
run: |
|
735 |
+
curl.exe -o $env:RUNNER_TEMP/sde.tar.xz -L "https://downloadmirror.intel.com/813591/sde-external-${env:SDE_VERSION}-win.tar.xz"
|
736 |
+
# for some weird reason windows tar doesn't like sde tar.xz
|
737 |
+
7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar.xz
|
738 |
+
7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar
|
739 |
+
$sde = $(join-path $env:RUNNER_TEMP sde-external-${env:SDE_VERSION}-win/sde.exe)
|
740 |
+
cd build
|
741 |
+
$env:LLAMA_SKIP_TESTS_SLOW_ON_EMULATOR = 1
|
742 |
+
& $sde -future -- ctest -L main -C Release --verbose --timeout 900
|
743 |
+
|
744 |
+
- name: Determine tag name
|
745 |
+
id: tag
|
746 |
+
shell: bash
|
747 |
+
run: |
|
748 |
+
BUILD_NUMBER="$(git rev-list --count HEAD)"
|
749 |
+
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
|
750 |
+
if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
|
751 |
+
echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
|
752 |
+
else
|
753 |
+
SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
|
754 |
+
echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
|
755 |
+
fi
|
756 |
+
|
757 |
+
- name: Pack artifacts
|
758 |
+
id: pack_artifacts
|
759 |
+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
760 |
+
run: |
|
761 |
+
Copy-Item LICENSE .\build\bin\Release\llama.cpp.txt
|
762 |
+
7z a llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}.zip .\build\bin\Release\*
|
763 |
+
|
764 |
+
- name: Upload artifacts
|
765 |
+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
766 |
+
uses: actions/upload-artifact@v4
|
767 |
+
with:
|
768 |
+
path: llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}.zip
|
769 |
+
name: llama-bin-win-${{ matrix.build }}.zip
|
770 |
+
|
771 |
+
ubuntu-latest-cmake-cuda:
|
772 |
+
runs-on: ubuntu-latest
|
773 |
+
container: nvidia/cuda:12.6.2-devel-ubuntu24.04
|
774 |
+
|
775 |
+
steps:
|
776 |
+
- name: Clone
|
777 |
+
id: checkout
|
778 |
+
uses: actions/checkout@v4
|
779 |
+
|
780 |
+
- name: Install dependencies
|
781 |
+
env:
|
782 |
+
DEBIAN_FRONTEND: noninteractive
|
783 |
+
run: |
|
784 |
+
apt update
|
785 |
+
apt install -y cmake build-essential ninja-build libgomp1 git
|
786 |
+
|
787 |
+
- name: Build with CMake
|
788 |
+
run: |
|
789 |
+
cmake -S . -B build -G Ninja -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DCMAKE_CUDA_ARCHITECTURES=89-real -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined -DLLAMA_FATAL_WARNINGS=ON
|
790 |
+
cmake --build build
|
791 |
+
|
792 |
+
windows-2019-cmake-cuda:
|
793 |
+
runs-on: windows-2019
|
794 |
+
|
795 |
+
strategy:
|
796 |
+
matrix:
|
797 |
+
cuda: ['12.4', '11.7']
|
798 |
+
build: ['cuda']
|
799 |
+
|
800 |
+
steps:
|
801 |
+
- name: Clone
|
802 |
+
id: checkout
|
803 |
+
uses: actions/checkout@v4
|
804 |
+
with:
|
805 |
+
fetch-depth: 0
|
806 |
+
|
807 |
+
- name: Install Cuda Toolkit 11.7
|
808 |
+
if: ${{ matrix.cuda == '11.7' }}
|
809 |
+
run: |
|
810 |
+
mkdir -p "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7"
|
811 |
+
choco install unzip -y
|
812 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-11.7.99-archive.zip"
|
813 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-11.7.99-archive.zip"
|
814 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-11.7.99-archive.zip"
|
815 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/libcublas/windows-x86_64/libcublas-windows-x86_64-11.7.4.6-archive.zip"
|
816 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-11.7.91-archive.zip"
|
817 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-11.7.91-archive.zip"
|
818 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-11.7.101-archive.zip"
|
819 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-11.7.91-archive.zip"
|
820 |
+
unzip '*.zip' -d "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7"
|
821 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_cudart-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
|
822 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvcc-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
|
823 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvrtc-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
|
824 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\libcublas-windows-x86_64-11.7.4.6-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
|
825 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvtx-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
|
826 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\visual_studio_integration-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
|
827 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvprof-windows-x86_64-11.7.101-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
|
828 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_cccl-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
|
829 |
+
echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
830 |
+
echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
831 |
+
echo "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
|
832 |
+
echo "CUDA_PATH_V11_7=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
|
833 |
+
|
834 |
+
- name: Install Cuda Toolkit 12.4
|
835 |
+
if: ${{ matrix.cuda == '12.4' }}
|
836 |
+
run: |
|
837 |
+
mkdir -p "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4"
|
838 |
+
choco install unzip -y
|
839 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-12.4.127-archive.zip"
|
840 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-12.4.131-archive.zip"
|
841 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-12.4.127-archive.zip"
|
842 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/libcublas/windows-x86_64/libcublas-windows-x86_64-12.4.5.8-archive.zip"
|
843 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-12.4.127-archive.zip"
|
844 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_profiler_api/windows-x86_64/cuda_profiler_api-windows-x86_64-12.4.127-archive.zip"
|
845 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-12.4.127-archive.zip"
|
846 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-12.4.127-archive.zip"
|
847 |
+
curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-12.4.127-archive.zip"
|
848 |
+
unzip '*.zip' -d "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4"
|
849 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_cudart-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
|
850 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvcc-windows-x86_64-12.4.131-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
|
851 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvrtc-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
|
852 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\libcublas-windows-x86_64-12.4.5.8-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
|
853 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvtx-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
|
854 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_profiler_api-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
|
855 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\visual_studio_integration-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
|
856 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvprof-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
|
857 |
+
xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_cccl-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
|
858 |
+
echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
859 |
+
echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
|
860 |
+
echo "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
|
861 |
+
echo "CUDA_PATH_V12_4=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
|
862 |
+
|
863 |
+
- name: Install ccache
|
864 |
+
uses: hendrikmuhs/[email protected]
|
865 |
+
with:
|
866 |
+
key: ${{ github.job }}-${{ matrix.cuda }}-${{ matrix.build }}
|
867 |
+
|
868 |
+
- name: Install Ninja
|
869 |
+
id: install_ninja
|
870 |
+
run: |
|
871 |
+
choco install ninja
|
872 |
+
|
873 |
+
- name: Build
|
874 |
+
id: cmake_build
|
875 |
+
shell: cmd
|
876 |
+
run: |
|
877 |
+
call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat"
|
878 |
+
cmake -S . -B build -G "Ninja Multi-Config" -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_CUDA=ON -DBUILD_SHARED_LIBS=ON -DGGML_RPC=ON
|
879 |
+
set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1
|
880 |
+
cmake --build build --config Release -j %NINJA_JOBS% -t ggml
|
881 |
+
cmake --build build --config Release
|
882 |
+
|
883 |
+
- name: Determine tag name
|
884 |
+
id: tag
|
885 |
+
shell: bash
|
886 |
+
run: |
|
887 |
+
BUILD_NUMBER="$(git rev-list --count HEAD)"
|
888 |
+
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
|
889 |
+
if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
|
890 |
+
echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
|
891 |
+
else
|
892 |
+
SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
|
893 |
+
echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
|
894 |
+
fi
|
895 |
+
|
896 |
+
- name: Pack artifacts
|
897 |
+
id: pack_artifacts
|
898 |
+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
899 |
+
run: |
|
900 |
+
7z a llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}-cu${{ matrix.cuda }}-x64.zip .\build\bin\Release\*
|
901 |
+
|
902 |
+
- name: Upload artifacts
|
903 |
+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
904 |
+
uses: actions/upload-artifact@v4
|
905 |
+
with:
|
906 |
+
path: llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}-cu${{ matrix.cuda }}-x64.zip
|
907 |
+
name: llama-bin-win-cu${{ matrix.cuda }}-x64.zip
|
908 |
+
|
909 |
+
- name: Copy and pack Cuda runtime
|
910 |
+
if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
|
911 |
+
run: |
|
912 |
+
echo "Cuda install location: ${{ env.CUDA_PATH }}"
|
913 |
+
$dst='.\build\bin\cudart\'
|
914 |
+
robocopy "${{env.CUDA_PATH}}\bin" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll
|
915 |
+
robocopy "${{env.CUDA_PATH}}\lib" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll
|
916 |
+
7z a cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip $dst\*
|
917 |
+
|
918 |
+
- name: Upload Cuda runtime
|
919 |
+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
920 |
+
uses: actions/upload-artifact@v4
|
921 |
+
with:
|
922 |
+
path: cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip
|
923 |
+
name: cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip
|
924 |
+
|
925 |
+
windows-latest-cmake-sycl:
|
926 |
+
runs-on: windows-latest
|
927 |
+
|
928 |
+
defaults:
|
929 |
+
run:
|
930 |
+
shell: bash
|
931 |
+
|
932 |
+
env:
|
933 |
+
WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/b380d914-366b-4b77-a74a-05e3c38b3514/intel-oneapi-base-toolkit-2025.0.0.882_offline.exe
|
934 |
+
WINDOWS_DPCPP_MKL: intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel:intel.oneapi.win.dnnl:intel.oneapi.win.tbb.devel
|
935 |
+
ONEAPI_ROOT: "C:/Program Files (x86)/Intel/oneAPI"
|
936 |
+
steps:
|
937 |
+
- name: Clone
|
938 |
+
id: checkout
|
939 |
+
uses: actions/checkout@v4
|
940 |
+
with:
|
941 |
+
fetch-depth: 0
|
942 |
+
|
943 |
+
- name: Install
|
944 |
+
run: |
|
945 |
+
scripts/install-oneapi.bat $WINDOWS_BASEKIT_URL $WINDOWS_DPCPP_MKL
|
946 |
+
|
947 |
+
- name: Build
|
948 |
+
id: cmake_build
|
949 |
+
run: examples/sycl/win-build-sycl.bat
|
950 |
+
|
951 |
+
- name: Determine tag name
|
952 |
+
id: tag
|
953 |
+
shell: bash
|
954 |
+
run: |
|
955 |
+
BUILD_NUMBER="$(git rev-list --count HEAD)"
|
956 |
+
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
|
957 |
+
if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
|
958 |
+
echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
|
959 |
+
else
|
960 |
+
SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
|
961 |
+
echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
|
962 |
+
fi
|
963 |
+
|
964 |
+
- name: Build the release package
|
965 |
+
id: pack_artifacts
|
966 |
+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
967 |
+
run: |
|
968 |
+
echo "cp oneAPI running time dll files in ${{ env.ONEAPI_ROOT }} to ./build/bin"
|
969 |
+
|
970 |
+
cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_sycl_blas.5.dll" ./build/bin
|
971 |
+
cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_core.2.dll" ./build/bin
|
972 |
+
cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_tbb_thread.2.dll" ./build/bin
|
973 |
+
|
974 |
+
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_adapter_level_zero.dll" ./build/bin
|
975 |
+
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_adapter_opencl.dll" ./build/bin
|
976 |
+
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_loader.dll" ./build/bin
|
977 |
+
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_win_proxy_loader.dll" ./build/bin
|
978 |
+
|
979 |
+
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/sycl8.dll" ./build/bin
|
980 |
+
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/svml_dispmd.dll" ./build/bin
|
981 |
+
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libmmd.dll" ./build/bin
|
982 |
+
cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libiomp5md.dll" ./build/bin
|
983 |
+
|
984 |
+
cp "${{ env.ONEAPI_ROOT }}/dnnl/latest/bin/dnnl.dll" ./build/bin
|
985 |
+
cp "${{ env.ONEAPI_ROOT }}/tbb/latest/bin/tbb12.dll" ./build/bin
|
986 |
+
|
987 |
+
echo "cp oneAPI running time dll files to ./build/bin done"
|
988 |
+
7z a llama-${{ steps.tag.outputs.name }}-bin-win-sycl-x64.zip ./build/bin/*
|
989 |
+
|
990 |
+
- name: Upload the release package
|
991 |
+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
992 |
+
uses: actions/upload-artifact@v4
|
993 |
+
with:
|
994 |
+
path: llama-${{ steps.tag.outputs.name }}-bin-win-sycl-x64.zip
|
995 |
+
name: llama-bin-win-sycl-x64.zip
|
996 |
+
|
997 |
+
windows-latest-cmake-hip:
|
998 |
+
if: ${{ github.event.inputs.create_release != 'true' }}
|
999 |
+
runs-on: windows-latest
|
1000 |
+
|
1001 |
+
steps:
|
1002 |
+
- name: Clone
|
1003 |
+
id: checkout
|
1004 |
+
uses: actions/checkout@v4
|
1005 |
+
|
1006 |
+
- name: Install
|
1007 |
+
id: depends
|
1008 |
+
run: |
|
1009 |
+
$ErrorActionPreference = "Stop"
|
1010 |
+
write-host "Downloading AMD HIP SDK Installer"
|
1011 |
+
Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
|
1012 |
+
write-host "Installing AMD HIP SDK"
|
1013 |
+
Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait
|
1014 |
+
write-host "Completed AMD HIP SDK installation"
|
1015 |
+
|
1016 |
+
- name: Verify ROCm
|
1017 |
+
id: verify
|
1018 |
+
run: |
|
1019 |
+
& 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version
|
1020 |
+
|
1021 |
+
- name: Install ccache
|
1022 |
+
uses: hendrikmuhs/[email protected]
|
1023 |
+
with:
|
1024 |
+
key: ${{ github.job }}
|
1025 |
+
|
1026 |
+
- name: Build
|
1027 |
+
id: cmake_build
|
1028 |
+
run: |
|
1029 |
+
$env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
|
1030 |
+
$env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
|
1031 |
+
cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DGGML_HIP=ON -DCMAKE_BUILD_TYPE=Release -DGGML_RPC=ON
|
1032 |
+
cmake --build build -j ${env:NUMBER_OF_PROCESSORS}
|
1033 |
+
|
1034 |
+
windows-latest-cmake-hip-release:
|
1035 |
+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
1036 |
+
runs-on: windows-latest
|
1037 |
+
|
1038 |
+
strategy:
|
1039 |
+
matrix:
|
1040 |
+
gpu_target: [gfx1100, gfx1101, gfx1030]
|
1041 |
+
|
1042 |
+
steps:
|
1043 |
+
- name: Clone
|
1044 |
+
id: checkout
|
1045 |
+
uses: actions/checkout@v4
|
1046 |
+
with:
|
1047 |
+
fetch-depth: 0
|
1048 |
+
|
1049 |
+
- name: Install
|
1050 |
+
id: depends
|
1051 |
+
run: |
|
1052 |
+
$ErrorActionPreference = "Stop"
|
1053 |
+
write-host "Downloading AMD HIP SDK Installer"
|
1054 |
+
Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
|
1055 |
+
write-host "Installing AMD HIP SDK"
|
1056 |
+
Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait
|
1057 |
+
write-host "Completed AMD HIP SDK installation"
|
1058 |
+
|
1059 |
+
- name: Verify ROCm
|
1060 |
+
id: verify
|
1061 |
+
run: |
|
1062 |
+
& 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version
|
1063 |
+
|
1064 |
+
- name: Build
|
1065 |
+
id: cmake_build
|
1066 |
+
run: |
|
1067 |
+
$env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
|
1068 |
+
$env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
|
1069 |
+
cmake -G "Unix Makefiles" -B build -S . -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" -DGGML_HIP=ON -DCMAKE_BUILD_TYPE=Release -DAMDGPU_TARGETS=${{ matrix.gpu_target }} -DGGML_RPC=ON
|
1070 |
+
cmake --build build -j ${env:NUMBER_OF_PROCESSORS}
|
1071 |
+
md "build\bin\rocblas\library\"
|
1072 |
+
cp "${env:HIP_PATH}\bin\hipblas.dll" "build\bin\"
|
1073 |
+
cp "${env:HIP_PATH}\bin\rocblas.dll" "build\bin\"
|
1074 |
+
cp "${env:HIP_PATH}\bin\rocblas\library\*" "build\bin\rocblas\library\"
|
1075 |
+
|
1076 |
+
- name: Determine tag name
|
1077 |
+
id: tag
|
1078 |
+
shell: bash
|
1079 |
+
run: |
|
1080 |
+
BUILD_NUMBER="$(git rev-list --count HEAD)"
|
1081 |
+
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
|
1082 |
+
if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
|
1083 |
+
echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
|
1084 |
+
else
|
1085 |
+
SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
|
1086 |
+
echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
|
1087 |
+
fi
|
1088 |
+
|
1089 |
+
- name: Pack artifacts
|
1090 |
+
id: pack_artifacts
|
1091 |
+
run: |
|
1092 |
+
7z a llama-${{ steps.tag.outputs.name }}-bin-win-hip-x64-${{ matrix.gpu_target }}.zip .\build\bin\*
|
1093 |
+
|
1094 |
+
- name: Upload artifacts
|
1095 |
+
uses: actions/upload-artifact@v4
|
1096 |
+
with:
|
1097 |
+
path: llama-${{ steps.tag.outputs.name }}-bin-win-hip-x64-${{ matrix.gpu_target }}.zip
|
1098 |
+
name: llama-bin-win-hip-x64-${{ matrix.gpu_target }}.zip
|
1099 |
+
|
1100 |
+
ios-xcode-build:
|
1101 |
+
runs-on: macos-latest
|
1102 |
+
|
1103 |
+
steps:
|
1104 |
+
- name: Checkout code
|
1105 |
+
uses: actions/checkout@v4
|
1106 |
+
|
1107 |
+
- name: Build Xcode project
|
1108 |
+
run: xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' build
|
1109 |
+
|
1110 |
+
android-build:
|
1111 |
+
runs-on: ubuntu-latest
|
1112 |
+
|
1113 |
+
steps:
|
1114 |
+
- name: Clone
|
1115 |
+
uses: actions/checkout@v4
|
1116 |
+
|
1117 |
+
- name: Set up JDK
|
1118 |
+
uses: actions/setup-java@v3
|
1119 |
+
with:
|
1120 |
+
java-version: 17
|
1121 |
+
distribution: zulu
|
1122 |
+
|
1123 |
+
- name: Setup Android SDK
|
1124 |
+
uses: android-actions/setup-android@v3
|
1125 |
+
with:
|
1126 |
+
log-accepted-android-sdk-licenses: false
|
1127 |
+
|
1128 |
+
- name: Build
|
1129 |
+
run: |
|
1130 |
+
cd examples/llama.android
|
1131 |
+
|
1132 |
+
./gradlew build --no-daemon
|
1133 |
+
|
1134 |
+
# freeBSD-latest:
|
1135 |
+
# runs-on: macos-12
|
1136 |
+
# steps:
|
1137 |
+
# - name: Clone
|
1138 |
+
# uses: actions/checkout@v4
|
1139 |
+
#
|
1140 |
+
# - name: Build
|
1141 |
+
# uses: cross-platform-actions/[email protected]
|
1142 |
+
# with:
|
1143 |
+
# operating_system: freebsd
|
1144 |
+
# version: '13.2'
|
1145 |
+
# hypervisor: 'qemu'
|
1146 |
+
# run: |
|
1147 |
+
# sudo pkg update
|
1148 |
+
# sudo pkg install -y gmake automake autoconf pkgconf llvm15 openblas
|
1149 |
+
# gmake CC=/usr/local/bin/clang15 CXX=/usr/local/bin/clang++15 -j `sysctl -n hw.ncpu`
|
1150 |
+
|
1151 |
+
release:
|
1152 |
+
if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
|
1153 |
+
|
1154 |
+
runs-on: ubuntu-latest
|
1155 |
+
|
1156 |
+
needs:
|
1157 |
+
- ubuntu-latest-cmake
|
1158 |
+
- macOS-latest-cmake
|
1159 |
+
- windows-latest-cmake
|
1160 |
+
- windows-2019-cmake-cuda
|
1161 |
+
- windows-latest-cmake-hip-release
|
1162 |
+
- macOS-latest-cmake-arm64
|
1163 |
+
- macOS-latest-cmake-x64
|
1164 |
+
|
1165 |
+
steps:
|
1166 |
+
- name: Clone
|
1167 |
+
id: checkout
|
1168 |
+
uses: actions/checkout@v4
|
1169 |
+
with:
|
1170 |
+
fetch-depth: 0
|
1171 |
+
|
1172 |
+
- name: Determine tag name
|
1173 |
+
id: tag
|
1174 |
+
shell: bash
|
1175 |
+
run: |
|
1176 |
+
BUILD_NUMBER="$(git rev-list --count HEAD)"
|
1177 |
+
SHORT_HASH="$(git rev-parse --short=7 HEAD)"
|
1178 |
+
if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
|
1179 |
+
echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
|
1180 |
+
else
|
1181 |
+
SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
|
1182 |
+
echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
|
1183 |
+
fi
|
1184 |
+
|
1185 |
+
- name: Download artifacts
|
1186 |
+
id: download-artifact
|
1187 |
+
uses: actions/download-artifact@v4
|
1188 |
+
with:
|
1189 |
+
path: ./artifact
|
1190 |
+
|
1191 |
+
- name: Move artifacts
|
1192 |
+
id: move_artifacts
|
1193 |
+
run: mkdir -p ./artifact/release && mv ./artifact/*/*.zip ./artifact/release
|
1194 |
+
|
1195 |
+
- name: Create release
|
1196 |
+
id: create_release
|
1197 |
+
uses: anzz1/action-create-release@v1
|
1198 |
+
env:
|
1199 |
+
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
1200 |
+
with:
|
1201 |
+
tag_name: ${{ steps.tag.outputs.name }}
|
1202 |
+
|
1203 |
+
- name: Upload release
|
1204 |
+
id: upload_release
|
1205 |
+
uses: actions/github-script@v3
|
1206 |
+
with:
|
1207 |
+
github-token: ${{secrets.GITHUB_TOKEN}}
|
1208 |
+
script: |
|
1209 |
+
const path = require('path');
|
1210 |
+
const fs = require('fs');
|
1211 |
+
const release_id = '${{ steps.create_release.outputs.id }}';
|
1212 |
+
for (let file of await fs.readdirSync('./artifact/release')) {
|
1213 |
+
if (path.extname(file) === '.zip') {
|
1214 |
+
console.log('uploadReleaseAsset', file);
|
1215 |
+
await github.repos.uploadReleaseAsset({
|
1216 |
+
owner: context.repo.owner,
|
1217 |
+
repo: context.repo.repo,
|
1218 |
+
release_id: release_id,
|
1219 |
+
name: file,
|
1220 |
+
data: await fs.readFileSync(`./artifact/release/${file}`)
|
1221 |
+
});
|
1222 |
+
}
|
1223 |
+
}
|
1224 |
+
|
1225 |
+
# ubuntu-latest-gcc:
|
1226 |
+
# runs-on: ubuntu-latest
|
1227 |
+
#
|
1228 |
+
# strategy:
|
1229 |
+
# matrix:
|
1230 |
+
# build: [Debug, Release]
|
1231 |
+
#
|
1232 |
+
# steps:
|
1233 |
+
# - name: Clone
|
1234 |
+
# uses: actions/checkout@v4
|
1235 |
+
#
|
1236 |
+
# - name: Dependencies
|
1237 |
+
# run: |
|
1238 |
+
# sudo apt-get update
|
1239 |
+
# sudo apt-get install build-essential
|
1240 |
+
# sudo apt-get install cmake
|
1241 |
+
#
|
1242 |
+
# - name: Configure
|
1243 |
+
# run: cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }}
|
1244 |
+
#
|
1245 |
+
# - name: Build
|
1246 |
+
# run: |
|
1247 |
+
# make
|
1248 |
+
#
|
1249 |
+
# ubuntu-latest-clang:
|
1250 |
+
# runs-on: ubuntu-latest
|
1251 |
+
#
|
1252 |
+
# strategy:
|
1253 |
+
# matrix:
|
1254 |
+
# build: [Debug, Release]
|
1255 |
+
#
|
1256 |
+
# steps:
|
1257 |
+
# - name: Clone
|
1258 |
+
# uses: actions/checkout@v4
|
1259 |
+
#
|
1260 |
+
# - name: Dependencies
|
1261 |
+
# run: |
|
1262 |
+
# sudo apt-get update
|
1263 |
+
# sudo apt-get install build-essential
|
1264 |
+
# sudo apt-get install cmake
|
1265 |
+
#
|
1266 |
+
# - name: Configure
|
1267 |
+
# run: cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang
|
1268 |
+
#
|
1269 |
+
# - name: Build
|
1270 |
+
# run: |
|
1271 |
+
# make
|
1272 |
+
#
|
1273 |
+
# ubuntu-latest-gcc-sanitized:
|
1274 |
+
# runs-on: ubuntu-latest
|
1275 |
+
#
|
1276 |
+
# strategy:
|
1277 |
+
# matrix:
|
1278 |
+
# sanitizer: [ADDRESS, THREAD, UNDEFINED]
|
1279 |
+
#
|
1280 |
+
# steps:
|
1281 |
+
# - name: Clone
|
1282 |
+
# uses: actions/checkout@v4
|
1283 |
+
#
|
1284 |
+
# - name: Dependencies
|
1285 |
+
# run: |
|
1286 |
+
# sudo apt-get update
|
1287 |
+
# sudo apt-get install build-essential
|
1288 |
+
# sudo apt-get install cmake
|
1289 |
+
#
|
1290 |
+
# - name: Configure
|
1291 |
+
# run: cmake . -DCMAKE_BUILD_TYPE=Debug -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON
|
1292 |
+
#
|
1293 |
+
# - name: Build
|
1294 |
+
# run: |
|
1295 |
+
# make
|
1296 |
+
#
|
1297 |
+
# windows:
|
1298 |
+
# runs-on: windows-latest
|
1299 |
+
#
|
1300 |
+
# strategy:
|
1301 |
+
# matrix:
|
1302 |
+
# build: [Release]
|
1303 |
+
# arch: [Win32, x64]
|
1304 |
+
# include:
|
1305 |
+
# - arch: Win32
|
1306 |
+
# s2arc: x86
|
1307 |
+
# - arch: x64
|
1308 |
+
# s2arc: x64
|
1309 |
+
#
|
1310 |
+
# steps:
|
1311 |
+
# - name: Clone
|
1312 |
+
# uses: actions/checkout@v4
|
1313 |
+
#
|
1314 |
+
# - name: Add msbuild to PATH
|
1315 |
+
# uses: microsoft/setup-msbuild@v1
|
1316 |
+
#
|
1317 |
+
# - name: Configure
|
1318 |
+
# run: >
|
1319 |
+
# cmake -S . -B ./build -A ${{ matrix.arch }}
|
1320 |
+
# -DCMAKE_BUILD_TYPE=${{ matrix.build }}
|
1321 |
+
#
|
1322 |
+
# - name: Build
|
1323 |
+
# run: |
|
1324 |
+
# cd ./build
|
1325 |
+
# msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }}
|
1326 |
+
#
|
1327 |
+
# - name: Upload binaries
|
1328 |
+
# uses: actions/upload-artifact@v4
|
1329 |
+
# with:
|
1330 |
+
# name: llama-bin-${{ matrix.arch }}
|
1331 |
+
# path: build/bin/${{ matrix.build }}
|
1332 |
+
#
|
1333 |
+
# windows-blas:
|
1334 |
+
# runs-on: windows-latest
|
1335 |
+
#
|
1336 |
+
# strategy:
|
1337 |
+
# matrix:
|
1338 |
+
# build: [Release]
|
1339 |
+
# arch: [Win32, x64]
|
1340 |
+
# blas: [ON]
|
1341 |
+
# include:
|
1342 |
+
# - arch: Win32
|
1343 |
+
# obzip: https://github.com/xianyi/OpenBLAS/releases/download/v0.3.21/OpenBLAS-0.3.21-x86.zip
|
1344 |
+
# s2arc: x86
|
1345 |
+
# - arch: x64
|
1346 |
+
# obzip: https://github.com/xianyi/OpenBLAS/releases/download/v0.3.21/OpenBLAS-0.3.21-x64.zip
|
1347 |
+
# s2arc: x64
|
1348 |
+
#
|
1349 |
+
# steps:
|
1350 |
+
# - name: Clone
|
1351 |
+
# uses: actions/checkout@v4
|
1352 |
+
#
|
1353 |
+
# - name: Add msbuild to PATH
|
1354 |
+
# uses: microsoft/setup-msbuild@v1
|
1355 |
+
#
|
1356 |
+
# - name: Fetch OpenBLAS
|
1357 |
+
# if: matrix.blas == 'ON'
|
1358 |
+
# run: |
|
1359 |
+
# C:/msys64/usr/bin/wget.exe -qO blas.zip ${{ matrix.obzip }}
|
1360 |
+
# 7z x blas.zip -oblas -y
|
1361 |
+
# copy blas/include/cblas.h .
|
1362 |
+
# copy blas/include/openblas_config.h .
|
1363 |
+
# echo "blasdir=$env:GITHUB_WORKSPACE/blas" >> $env:GITHUB_ENV
|
1364 |
+
#
|
1365 |
+
# - name: Configure
|
1366 |
+
# run: >
|
1367 |
+
# cmake -S . -B ./build -A ${{ matrix.arch }}
|
1368 |
+
# -DCMAKE_BUILD_TYPE=${{ matrix.build }}
|
1369 |
+
# -DLLAMA_SUPPORT_OPENBLAS=${{ matrix.blas }}
|
1370 |
+
# -DCMAKE_LIBRARY_PATH="$env:blasdir/lib"
|
1371 |
+
#
|
1372 |
+
# - name: Build
|
1373 |
+
# run: |
|
1374 |
+
# cd ./build
|
1375 |
+
# msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }}
|
1376 |
+
#
|
1377 |
+
# - name: Copy libopenblas.dll
|
1378 |
+
# if: matrix.blas == 'ON'
|
1379 |
+
# run: copy "$env:blasdir/bin/libopenblas.dll" build/bin/${{ matrix.build }}
|
1380 |
+
#
|
1381 |
+
# - name: Upload binaries
|
1382 |
+
# if: matrix.blas == 'ON'
|
1383 |
+
# uses: actions/upload-artifact@v4
|
1384 |
+
# with:
|
1385 |
+
# name: llama-blas-bin-${{ matrix.arch }}
|
1386 |
+
# path: build/bin/${{ matrix.build }}
|
1387 |
+
#
|
1388 |
+
# emscripten:
|
1389 |
+
# runs-on: ubuntu-latest
|
1390 |
+
#
|
1391 |
+
# strategy:
|
1392 |
+
# matrix:
|
1393 |
+
# build: [Release]
|
1394 |
+
#
|
1395 |
+
# steps:
|
1396 |
+
# - name: Clone
|
1397 |
+
# uses: actions/checkout@v4
|
1398 |
+
#
|
1399 |
+
# - name: Dependencies
|
1400 |
+
# run: |
|
1401 |
+
# wget -q https://github.com/emscripten-core/emsdk/archive/master.tar.gz
|
1402 |
+
# tar -xvf master.tar.gz
|
1403 |
+
# emsdk-master/emsdk update
|
1404 |
+
# emsdk-master/emsdk install latest
|
1405 |
+
# emsdk-master/emsdk activate latest
|
1406 |
+
#
|
1407 |
+
# - name: Configure
|
1408 |
+
# run: echo "tmp"
|
1409 |
+
#
|
1410 |
+
# - name: Build
|
1411 |
+
# run: |
|
1412 |
+
# pushd emsdk-master
|
1413 |
+
# source ./emsdk_env.sh
|
1414 |
+
# popd
|
1415 |
+
# emcmake cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }}
|
1416 |
+
# make
|
llama.cpp/.github/workflows/close-issue.yml
ADDED
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
name: Close inactive issues
|
2 |
+
on:
|
3 |
+
schedule:
|
4 |
+
- cron: "42 0 * * *"
|
5 |
+
|
6 |
+
# Fine-grant permission
|
7 |
+
# https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
|
8 |
+
permissions:
|
9 |
+
issues: write
|
10 |
+
|
11 |
+
jobs:
|
12 |
+
close-issues:
|
13 |
+
runs-on: ubuntu-latest
|
14 |
+
permissions:
|
15 |
+
issues: write
|
16 |
+
pull-requests: write
|
17 |
+
steps:
|
18 |
+
- uses: actions/stale@v5
|
19 |
+
with:
|
20 |
+
exempt-issue-labels: "refactor,help wanted,good first issue,research,bug"
|
21 |
+
days-before-issue-stale: 30
|
22 |
+
days-before-issue-close: 14
|
23 |
+
stale-issue-label: "stale"
|
24 |
+
close-issue-message: "This issue was closed because it has been inactive for 14 days since being marked as stale."
|
25 |
+
days-before-pr-stale: -1
|
26 |
+
days-before-pr-close: -1
|
27 |
+
operations-per-run: 10000
|
28 |
+
repo-token: ${{ secrets.GITHUB_TOKEN }}
|