KBaba7 commited on
Commit
2a8cee5
·
verified ·
1 Parent(s): eeccc3d

Upload 2637 files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +102 -0
  2. llama.cpp/.DS_Store +0 -0
  3. llama.cpp/.clang-format +161 -0
  4. llama.cpp/.clang-tidy +26 -0
  5. llama.cpp/.devops/cloud-v-pipeline +22 -0
  6. llama.cpp/.devops/cpu.Dockerfile +92 -0
  7. llama.cpp/.devops/cuda.Dockerfile +94 -0
  8. llama.cpp/.devops/intel.Dockerfile +91 -0
  9. llama.cpp/.devops/llama-cli-cann.Dockerfile +44 -0
  10. llama.cpp/.devops/llama-cpp-cuda.srpm.spec +83 -0
  11. llama.cpp/.devops/llama-cpp.srpm.spec +85 -0
  12. llama.cpp/.devops/musa.Dockerfile +108 -0
  13. llama.cpp/.devops/nix/apps.nix +21 -0
  14. llama.cpp/.devops/nix/devshells.nix +52 -0
  15. llama.cpp/.devops/nix/docker.nix +37 -0
  16. llama.cpp/.devops/nix/jetson-support.nix +39 -0
  17. llama.cpp/.devops/nix/nixpkgs-instances.nix +45 -0
  18. llama.cpp/.devops/nix/package-gguf-py.nix +36 -0
  19. llama.cpp/.devops/nix/package.nix +247 -0
  20. llama.cpp/.devops/nix/python-scripts.nix +66 -0
  21. llama.cpp/.devops/nix/scope.nix +41 -0
  22. llama.cpp/.devops/nix/sif.nix +27 -0
  23. llama.cpp/.devops/rocm.Dockerfile +113 -0
  24. llama.cpp/.devops/tools.sh +49 -0
  25. llama.cpp/.devops/vulkan.Dockerfile +89 -0
  26. llama.cpp/.dockerignore +20 -0
  27. llama.cpp/.ecrc +6 -0
  28. llama.cpp/.editorconfig +50 -0
  29. llama.cpp/.flake8 +17 -0
  30. llama.cpp/.github/.DS_Store +0 -0
  31. llama.cpp/.github/ISSUE_TEMPLATE/010-bug-compilation.yml +87 -0
  32. llama.cpp/.github/ISSUE_TEMPLATE/011-bug-results.yml +101 -0
  33. llama.cpp/.github/ISSUE_TEMPLATE/019-bug-misc.yml +91 -0
  34. llama.cpp/.github/ISSUE_TEMPLATE/020-enhancement.yml +51 -0
  35. llama.cpp/.github/ISSUE_TEMPLATE/030-research.yml +52 -0
  36. llama.cpp/.github/ISSUE_TEMPLATE/040-refactor.yml +28 -0
  37. llama.cpp/.github/ISSUE_TEMPLATE/config.yml +11 -0
  38. llama.cpp/.github/labeler.yml +86 -0
  39. llama.cpp/.github/pull_request_template.md +1 -0
  40. llama.cpp/.github/workflows/bench.yml.disabled +315 -0
  41. llama.cpp/.github/workflows/build.yml +1645 -0
  42. llama.cpp/.github/workflows/close-issue.yml +28 -0
  43. llama.cpp/.github/workflows/docker.yml +173 -0
  44. llama.cpp/.github/workflows/editorconfig.yml +29 -0
  45. llama.cpp/.github/workflows/gguf-publish.yml +44 -0
  46. llama.cpp/.github/workflows/labeler.yml +17 -0
  47. llama.cpp/.github/workflows/python-check-requirements.yml +33 -0
  48. llama.cpp/.github/workflows/python-lint.yml +30 -0
  49. llama.cpp/.github/workflows/python-type-check.yml +40 -0
  50. llama.cpp/.github/workflows/server.yml +239 -0
.gitattributes CHANGED
@@ -33,3 +33,105 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ llama.cpp/build/bin/libggml-base.dylib filter=lfs diff=lfs merge=lfs -text
37
+ llama.cpp/build/bin/libggml-cpu.dylib filter=lfs diff=lfs merge=lfs -text
38
+ llama.cpp/build/bin/libggml-metal.dylib filter=lfs diff=lfs merge=lfs -text
39
+ llama.cpp/build/bin/libllama.dylib filter=lfs diff=lfs merge=lfs -text
40
+ llama.cpp/build/bin/libllava_shared.dylib filter=lfs diff=lfs merge=lfs -text
41
+ llama.cpp/build/bin/llama-batched filter=lfs diff=lfs merge=lfs -text
42
+ llama.cpp/build/bin/llama-batched-bench filter=lfs diff=lfs merge=lfs -text
43
+ llama.cpp/build/bin/llama-bench filter=lfs diff=lfs merge=lfs -text
44
+ llama.cpp/build/bin/llama-cli filter=lfs diff=lfs merge=lfs -text
45
+ llama.cpp/build/bin/llama-convert-llama2c-to-ggml filter=lfs diff=lfs merge=lfs -text
46
+ llama.cpp/build/bin/llama-cvector-generator filter=lfs diff=lfs merge=lfs -text
47
+ llama.cpp/build/bin/llama-embedding filter=lfs diff=lfs merge=lfs -text
48
+ llama.cpp/build/bin/llama-eval-callback filter=lfs diff=lfs merge=lfs -text
49
+ llama.cpp/build/bin/llama-export-lora filter=lfs diff=lfs merge=lfs -text
50
+ llama.cpp/build/bin/llama-gen-docs filter=lfs diff=lfs merge=lfs -text
51
+ llama.cpp/build/bin/llama-gritlm filter=lfs diff=lfs merge=lfs -text
52
+ llama.cpp/build/bin/llama-imatrix filter=lfs diff=lfs merge=lfs -text
53
+ llama.cpp/build/bin/llama-infill filter=lfs diff=lfs merge=lfs -text
54
+ llama.cpp/build/bin/llama-llava-cli filter=lfs diff=lfs merge=lfs -text
55
+ llama.cpp/build/bin/llama-llava-clip-quantize-cli filter=lfs diff=lfs merge=lfs -text
56
+ llama.cpp/build/bin/llama-lookahead filter=lfs diff=lfs merge=lfs -text
57
+ llama.cpp/build/bin/llama-lookup filter=lfs diff=lfs merge=lfs -text
58
+ llama.cpp/build/bin/llama-lookup-create filter=lfs diff=lfs merge=lfs -text
59
+ llama.cpp/build/bin/llama-lookup-stats filter=lfs diff=lfs merge=lfs -text
60
+ llama.cpp/build/bin/llama-minicpmv-cli filter=lfs diff=lfs merge=lfs -text
61
+ llama.cpp/build/bin/llama-parallel filter=lfs diff=lfs merge=lfs -text
62
+ llama.cpp/build/bin/llama-passkey filter=lfs diff=lfs merge=lfs -text
63
+ llama.cpp/build/bin/llama-perplexity filter=lfs diff=lfs merge=lfs -text
64
+ llama.cpp/build/bin/llama-quantize filter=lfs diff=lfs merge=lfs -text
65
+ llama.cpp/build/bin/llama-quantize-stats filter=lfs diff=lfs merge=lfs -text
66
+ llama.cpp/build/bin/llama-qwen2vl-cli filter=lfs diff=lfs merge=lfs -text
67
+ llama.cpp/build/bin/llama-retrieval filter=lfs diff=lfs merge=lfs -text
68
+ llama.cpp/build/bin/llama-run filter=lfs diff=lfs merge=lfs -text
69
+ llama.cpp/build/bin/llama-save-load-state filter=lfs diff=lfs merge=lfs -text
70
+ llama.cpp/build/bin/llama-server filter=lfs diff=lfs merge=lfs -text
71
+ llama.cpp/build/bin/llama-speculative filter=lfs diff=lfs merge=lfs -text
72
+ llama.cpp/build/bin/llama-speculative-simple filter=lfs diff=lfs merge=lfs -text
73
+ llama.cpp/build/bin/llama-tokenize filter=lfs diff=lfs merge=lfs -text
74
+ llama.cpp/build/bin/llama-tts filter=lfs diff=lfs merge=lfs -text
75
+ llama.cpp/build/bin/test-arg-parser filter=lfs diff=lfs merge=lfs -text
76
+ llama.cpp/build/bin/test-backend-ops filter=lfs diff=lfs merge=lfs -text
77
+ llama.cpp/build/bin/test-chat filter=lfs diff=lfs merge=lfs -text
78
+ llama.cpp/build/bin/test-chat-template filter=lfs diff=lfs merge=lfs -text
79
+ llama.cpp/build/bin/test-grammar-integration filter=lfs diff=lfs merge=lfs -text
80
+ llama.cpp/build/bin/test-json-schema-to-grammar filter=lfs diff=lfs merge=lfs -text
81
+ llama.cpp/build/bin/test-tokenizer-0 filter=lfs diff=lfs merge=lfs -text
82
+ llama.cpp/build/bin/test-tokenizer-1-bpe filter=lfs diff=lfs merge=lfs -text
83
+ llama.cpp/build/bin/test-tokenizer-1-spm filter=lfs diff=lfs merge=lfs -text
84
+ llama.cpp/build/common/CMakeFiles/common.dir/arg.cpp.o filter=lfs diff=lfs merge=lfs -text
85
+ llama.cpp/build/common/CMakeFiles/common.dir/chat.cpp.o filter=lfs diff=lfs merge=lfs -text
86
+ llama.cpp/build/common/CMakeFiles/common.dir/common.cpp.o filter=lfs diff=lfs merge=lfs -text
87
+ llama.cpp/build/common/CMakeFiles/common.dir/json-schema-to-grammar.cpp.o filter=lfs diff=lfs merge=lfs -text
88
+ llama.cpp/build/common/libcommon.a filter=lfs diff=lfs merge=lfs -text
89
+ llama.cpp/build/examples/llama-bench/CMakeFiles/llama-bench.dir/llama-bench.cpp.o filter=lfs diff=lfs merge=lfs -text
90
+ llama.cpp/build/examples/llava/CMakeFiles/llava.dir/clip.cpp.o filter=lfs diff=lfs merge=lfs -text
91
+ llama.cpp/build/examples/llava/libllava_static.a filter=lfs diff=lfs merge=lfs -text
92
+ llama.cpp/build/examples/perplexity/CMakeFiles/llama-perplexity.dir/perplexity.cpp.o filter=lfs diff=lfs merge=lfs -text
93
+ llama.cpp/build/examples/quantize-stats/CMakeFiles/llama-quantize-stats.dir/quantize-stats.cpp.o filter=lfs diff=lfs merge=lfs -text
94
+ llama.cpp/build/examples/run/CMakeFiles/llama-run.dir/run.cpp.o filter=lfs diff=lfs merge=lfs -text
95
+ llama.cpp/build/examples/server/CMakeFiles/llama-server.dir/server.cpp.o filter=lfs diff=lfs merge=lfs -text
96
+ llama.cpp/build/examples/tts/CMakeFiles/llama-tts.dir/tts.cpp.o filter=lfs diff=lfs merge=lfs -text
97
+ llama.cpp/build/ggml/src/CMakeFiles/ggml-base.dir/ggml-quants.c.o filter=lfs diff=lfs merge=lfs -text
98
+ llama.cpp/build/ggml/src/CMakeFiles/ggml-base.dir/ggml.c.o filter=lfs diff=lfs merge=lfs -text
99
+ llama.cpp/build/ggml/src/CMakeFiles/ggml-base.dir/gguf.cpp.o filter=lfs diff=lfs merge=lfs -text
100
+ llama.cpp/build/ggml/src/CMakeFiles/ggml-cpu.dir/ggml-cpu/ggml-cpu.c.o filter=lfs diff=lfs merge=lfs -text
101
+ llama.cpp/build/ggml/src/CMakeFiles/ggml-cpu.dir/ggml-cpu/llamafile/sgemm.cpp.o filter=lfs diff=lfs merge=lfs -text
102
+ llama.cpp/build/ggml/src/ggml-metal/CMakeFiles/ggml-metal.dir/__/__/__/autogenerated/ggml-metal-embed.s.o filter=lfs diff=lfs merge=lfs -text
103
+ llama.cpp/build/ggml/src/ggml-metal/CMakeFiles/ggml-metal.dir/ggml-metal.m.o filter=lfs diff=lfs merge=lfs -text
104
+ llama.cpp/build/src/CMakeFiles/llama.dir/llama-model-loader.cpp.o filter=lfs diff=lfs merge=lfs -text
105
+ llama.cpp/build/src/CMakeFiles/llama.dir/llama-model.cpp.o filter=lfs diff=lfs merge=lfs -text
106
+ llama.cpp/build/src/CMakeFiles/llama.dir/llama-sampling.cpp.o filter=lfs diff=lfs merge=lfs -text
107
+ llama.cpp/build/src/CMakeFiles/llama.dir/llama-vocab.cpp.o filter=lfs diff=lfs merge=lfs -text
108
+ llama.cpp/build/src/CMakeFiles/llama.dir/llama.cpp.o filter=lfs diff=lfs merge=lfs -text
109
+ llama.cpp/build/src/CMakeFiles/llama.dir/unicode.cpp.o filter=lfs diff=lfs merge=lfs -text
110
+ llama.cpp/build/tests/CMakeFiles/test-backend-ops.dir/test-backend-ops.cpp.o filter=lfs diff=lfs merge=lfs -text
111
+ llama.cpp/build/tests/CMakeFiles/test-chat-template.dir/test-chat-template.cpp.o filter=lfs diff=lfs merge=lfs -text
112
+ llama.cpp/build/tests/CMakeFiles/test-chat.dir/test-chat.cpp.o filter=lfs diff=lfs merge=lfs -text
113
+ llama.cpp/build/tests/CMakeFiles/test-grammar-integration.dir/test-grammar-integration.cpp.o filter=lfs diff=lfs merge=lfs -text
114
+ llama.cpp/build/tests/CMakeFiles/test-json-schema-to-grammar.dir/test-json-schema-to-grammar.cpp.o filter=lfs diff=lfs merge=lfs -text
115
+ llama.cpp/docs/development/llama-star/idea-arch.key filter=lfs diff=lfs merge=lfs -text
116
+ llama.cpp/examples/server/themes/buttons-top/buttons_top.png filter=lfs diff=lfs merge=lfs -text
117
+ llama.cpp/examples/server/themes/wild/llamapattern.png filter=lfs diff=lfs merge=lfs -text
118
+ llama.cpp/examples/server/themes/wild/wild.png filter=lfs diff=lfs merge=lfs -text
119
+ llama.cpp/media/llama0-banner.png filter=lfs diff=lfs merge=lfs -text
120
+ llama.cpp/media/llama0-logo.png filter=lfs diff=lfs merge=lfs -text
121
+ llama.cpp/media/matmul.png filter=lfs diff=lfs merge=lfs -text
122
+ llama.cpp/models/ggml-vocab-aquila.gguf filter=lfs diff=lfs merge=lfs -text
123
+ llama.cpp/models/ggml-vocab-baichuan.gguf filter=lfs diff=lfs merge=lfs -text
124
+ llama.cpp/models/ggml-vocab-bert-bge.gguf filter=lfs diff=lfs merge=lfs -text
125
+ llama.cpp/models/ggml-vocab-command-r.gguf filter=lfs diff=lfs merge=lfs -text
126
+ llama.cpp/models/ggml-vocab-deepseek-coder.gguf filter=lfs diff=lfs merge=lfs -text
127
+ llama.cpp/models/ggml-vocab-deepseek-llm.gguf filter=lfs diff=lfs merge=lfs -text
128
+ llama.cpp/models/ggml-vocab-falcon.gguf filter=lfs diff=lfs merge=lfs -text
129
+ llama.cpp/models/ggml-vocab-gpt-2.gguf filter=lfs diff=lfs merge=lfs -text
130
+ llama.cpp/models/ggml-vocab-gpt-neox.gguf filter=lfs diff=lfs merge=lfs -text
131
+ llama.cpp/models/ggml-vocab-llama-bpe.gguf filter=lfs diff=lfs merge=lfs -text
132
+ llama.cpp/models/ggml-vocab-llama-spm.gguf filter=lfs diff=lfs merge=lfs -text
133
+ llama.cpp/models/ggml-vocab-mpt.gguf filter=lfs diff=lfs merge=lfs -text
134
+ llama.cpp/models/ggml-vocab-phi-3.gguf filter=lfs diff=lfs merge=lfs -text
135
+ llama.cpp/models/ggml-vocab-qwen2.gguf filter=lfs diff=lfs merge=lfs -text
136
+ llama.cpp/models/ggml-vocab-refact.gguf filter=lfs diff=lfs merge=lfs -text
137
+ llama.cpp/models/ggml-vocab-starcoder.gguf filter=lfs diff=lfs merge=lfs -text
llama.cpp/.DS_Store ADDED
Binary file (10.2 kB). View file
 
llama.cpp/.clang-format ADDED
@@ -0,0 +1,161 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ Language: Cpp
3
+ AlignAfterOpenBracket: Align
4
+ AlignArrayOfStructures: Left
5
+ AlignConsecutiveAssignments: AcrossComments
6
+ AlignConsecutiveBitFields: AcrossComments
7
+ AlignConsecutiveDeclarations: AcrossComments
8
+ AlignConsecutiveMacros: AcrossComments
9
+ # AlignConsecutiveShortCaseStatements: AcrossComments
10
+ AlignEscapedNewlines: Left # LeftWithLastLine
11
+ AlignOperands: Align
12
+ AlignTrailingComments:
13
+ Kind: Always
14
+ OverEmptyLines: 1
15
+ AllowAllArgumentsOnNextLine: true
16
+ AllowAllParametersOfDeclarationOnNextLine: false
17
+ # AllowBreakBeforeNoexceptSpecifier: OnlyWithParen
18
+ AllowShortBlocksOnASingleLine: Never
19
+ AllowShortCaseLabelsOnASingleLine: false
20
+ AllowShortFunctionsOnASingleLine: Inline
21
+ AllowShortIfStatementsOnASingleLine: Never
22
+ AllowShortLambdasOnASingleLine: Inline
23
+ AllowShortLoopsOnASingleLine: false
24
+ AlwaysBreakBeforeMultilineStrings: true
25
+ BinPackArguments: true
26
+ BinPackParameters: true # OnePerLine
27
+ BitFieldColonSpacing: Both
28
+ BreakBeforeBraces: Custom # Attach
29
+ BraceWrapping:
30
+ AfterCaseLabel: true
31
+ AfterClass: false
32
+ AfterControlStatement: false
33
+ AfterEnum: false
34
+ AfterFunction: false
35
+ AfterNamespace: false
36
+ AfterObjCDeclaration: false
37
+ AfterStruct: false
38
+ AfterUnion: false
39
+ AfterExternBlock: false
40
+ BeforeCatch: false
41
+ BeforeElse: false
42
+ BeforeLambdaBody: false
43
+ BeforeWhile: false
44
+ IndentBraces: false
45
+ SplitEmptyFunction: false
46
+ SplitEmptyRecord: false
47
+ SplitEmptyNamespace: false
48
+ # BreakAdjacentStringLiterals: true
49
+ BreakAfterAttributes: Never
50
+ BreakBeforeBinaryOperators: None
51
+ BreakBeforeInlineASMColon: OnlyMultiline
52
+ BreakBeforeTernaryOperators: false
53
+ # BreakBinaryOperations: Never
54
+ BreakConstructorInitializers: AfterColon
55
+ # BreakFunctionDefinitionParameters: false
56
+ BreakInheritanceList: AfterComma
57
+ BreakStringLiterals: true
58
+ # BreakTemplateDeclarations: Yes
59
+ ColumnLimit: 120
60
+ CommentPragmas: '^ IWYU pragma:'
61
+ CompactNamespaces: false
62
+ ConstructorInitializerIndentWidth: 4
63
+ ContinuationIndentWidth: 4
64
+ Cpp11BracedListStyle: false
65
+ DerivePointerAlignment: false
66
+ DisableFormat: false
67
+ EmptyLineBeforeAccessModifier: Leave
68
+ EmptyLineAfterAccessModifier: Never
69
+ ExperimentalAutoDetectBinPacking: false
70
+ FixNamespaceComments: true
71
+ IncludeBlocks: Regroup
72
+ IncludeCategories:
73
+ - Regex: '^<.*\.h>'
74
+ Priority: 1
75
+ SortPriority: 0
76
+ - Regex: '^<.*'
77
+ Priority: 2
78
+ SortPriority: 0
79
+ - Regex: '.*'
80
+ Priority: 3
81
+ SortPriority: 0
82
+ IncludeIsMainRegex: '([-_](test|unittest))?$'
83
+ IncludeIsMainSourceRegex: ''
84
+ IndentAccessModifiers: false
85
+ IndentCaseBlocks: true
86
+ IndentCaseLabels: true
87
+ IndentExternBlock: NoIndent
88
+ IndentGotoLabels: false
89
+ IndentPPDirectives: AfterHash
90
+ IndentWidth: 4
91
+ IndentWrappedFunctionNames: false
92
+ InsertBraces: true # NOTE: may lead to incorrect formatting
93
+ InsertNewlineAtEOF: true
94
+ JavaScriptQuotes: Leave
95
+ JavaScriptWrapImports: true
96
+ KeepEmptyLinesAtTheStartOfBlocks: false
97
+ LambdaBodyIndentation: Signature
98
+ LineEnding: LF
99
+ MacroBlockBegin: ''
100
+ MacroBlockEnd: ''
101
+ MaxEmptyLinesToKeep: 1
102
+ NamespaceIndentation: None
103
+ ObjCBinPackProtocolList: Auto
104
+ ObjCBlockIndentWidth: 4
105
+ ObjCSpaceAfterProperty: true
106
+ ObjCSpaceBeforeProtocolList: true
107
+ PPIndentWidth: -1
108
+ PackConstructorInitializers: CurrentLine
109
+ PenaltyBreakAssignment: 2
110
+ PenaltyBreakBeforeFirstCallParameter: 1
111
+ PenaltyBreakComment: 300
112
+ PenaltyBreakFirstLessLess: 120
113
+ PenaltyBreakString: 1000
114
+ PenaltyBreakTemplateDeclaration: 10
115
+ PenaltyExcessCharacter: 1000000
116
+ PenaltyReturnTypeOnItsOwnLine: 200
117
+ PointerAlignment: Middle
118
+ QualifierAlignment: Left
119
+ #QualifierOrder: ['static', 'inline', 'friend', 'constexpr', 'const', 'volatile', 'type', 'restrict']
120
+ RawStringFormats:
121
+ - Language: Cpp
122
+ Delimiters:
123
+ - cc
124
+ - CC
125
+ - cpp
126
+ - Cpp
127
+ - CPP
128
+ - 'c++'
129
+ - 'C++'
130
+ CanonicalDelimiter: ''
131
+ ReferenceAlignment: Middle
132
+ ReflowComments: false # IndentOnly
133
+ SeparateDefinitionBlocks: Always
134
+ SortIncludes: CaseInsensitive
135
+ SortUsingDeclarations: LexicographicNumeric
136
+ SpaceAfterCStyleCast: true
137
+ SpaceAfterLogicalNot: false
138
+ SpaceAfterTemplateKeyword: true
139
+ SpaceBeforeAssignmentOperators: true
140
+ SpaceBeforeCpp11BracedList: false
141
+ SpaceBeforeCtorInitializerColon: true
142
+ SpaceBeforeInheritanceColon: true
143
+ SpaceBeforeParens: ControlStatements
144
+ SpaceBeforeRangeBasedForLoopColon: true
145
+ SpaceInEmptyBlock: false
146
+ SpaceInEmptyParentheses: false
147
+ SpacesBeforeTrailingComments: 2
148
+ SpacesInAngles: Never
149
+ SpacesInContainerLiterals: true
150
+ SpacesInLineCommentPrefix:
151
+ Minimum: 1
152
+ Maximum: -1
153
+ SpacesInParentheses: false
154
+ SpacesInSquareBrackets: false
155
+ SpaceBeforeSquareBrackets: false
156
+ Standard: c++17
157
+ TabWidth: 4
158
+ UseTab: Never
159
+ WhitespaceSensitiveMacros: ['STRINGIZE']
160
+ ...
161
+
llama.cpp/.clang-tidy ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ Checks: >
3
+ bugprone-*,
4
+ -bugprone-easily-swappable-parameters,
5
+ -bugprone-implicit-widening-of-multiplication-result,
6
+ -bugprone-misplaced-widening-cast,
7
+ -bugprone-narrowing-conversions,
8
+ readability-*,
9
+ -readability-avoid-unconditional-preprocessor-if,
10
+ -readability-function-cognitive-complexity,
11
+ -readability-identifier-length,
12
+ -readability-implicit-bool-conversion,
13
+ -readability-magic-numbers,
14
+ -readability-uppercase-literal-suffix,
15
+ -readability-simplify-boolean-expr,
16
+ clang-analyzer-*,
17
+ -clang-analyzer-security.insecureAPI.DeprecatedOrUnsafeBufferHandling,
18
+ performance-*,
19
+ portability-*,
20
+ -portability-simd-intrinsics,
21
+ misc-*,
22
+ -misc-const-correctness,
23
+ -misc-non-private-member-variables-in-classes,
24
+ -misc-no-recursion,
25
+ -misc-use-anonymous-namespace,
26
+ FormatStyle: none
llama.cpp/.devops/cloud-v-pipeline ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ node('x86_runner1'){ // Running on x86 runner containing latest vector qemu, latest vector gcc and all the necessary libraries
2
+ stage('Cleanup'){
3
+ cleanWs() // Cleaning previous CI build in workspace
4
+ }
5
+ stage('checkout repo'){
6
+ retry(5){ // Retry if the cloning fails due to some reason
7
+ checkout scm // Clone the repo on Runner
8
+ }
9
+ }
10
+ stage('Compiling llama.cpp'){
11
+ sh'''#!/bin/bash
12
+ make RISCV=1 RISCV_CROSS_COMPILE=1 # Compiling llama for RISC-V
13
+ '''
14
+ }
15
+ stage('Running llama.cpp'){
16
+ sh'''#!/bin/bash
17
+ module load gnu-bin2/0.1 # loading latest versions of vector qemu and vector gcc
18
+ qemu-riscv64 -L /softwares/gnu-bin2/sysroot -cpu rv64,v=true,vlen=256,elen=64,vext_spec=v1.0 ./llama-cli -m /home/alitariq/codellama-7b.Q4_K_M.gguf -p "Anything" -n 9 > llama_log.txt # Running llama.cpp on vector qemu-riscv64
19
+ cat llama_log.txt # Printing results
20
+ '''
21
+ }
22
+ }
llama.cpp/.devops/cpu.Dockerfile ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG UBUNTU_VERSION=22.04
2
+
3
+ FROM ubuntu:$UBUNTU_VERSION AS build
4
+
5
+ ARG TARGETARCH
6
+
7
+ ARG GGML_CPU_ARM_ARCH=armv8-a
8
+
9
+ RUN apt-get update && \
10
+ apt-get install -y build-essential git cmake libcurl4-openssl-dev
11
+
12
+ WORKDIR /app
13
+
14
+ COPY . .
15
+
16
+ RUN if [ "$TARGETARCH" = "amd64" ]; then \
17
+ cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DLLAMA_CURL=ON -DGGML_NATIVE=OFF -DGGML_BACKEND_DL=ON -DGGML_CPU_ALL_VARIANTS=ON; \
18
+ elif [ "$TARGETARCH" = "arm64" ]; then \
19
+ cmake -S . -B build -DCMAKE_BUILD_TYPE=Release -DLLAMA_CURL=ON -DGGML_NATIVE=OFF -DGGML_CPU_ARM_ARCH=${GGML_CPU_ARM_ARCH}; \
20
+ else \
21
+ echo "Unsupported architecture"; \
22
+ exit 1; \
23
+ fi && \
24
+ cmake --build build -j $(nproc)
25
+
26
+ RUN mkdir -p /app/lib && \
27
+ find build -name "*.so" -exec cp {} /app/lib \;
28
+
29
+ RUN mkdir -p /app/full \
30
+ && cp build/bin/* /app/full \
31
+ && cp *.py /app/full \
32
+ && cp -r gguf-py /app/full \
33
+ && cp -r requirements /app/full \
34
+ && cp requirements.txt /app/full \
35
+ && cp .devops/tools.sh /app/full/tools.sh
36
+
37
+ ## Base image
38
+ FROM ubuntu:$UBUNTU_VERSION AS base
39
+
40
+ RUN apt-get update \
41
+ && apt-get install -y libgomp1 curl\
42
+ && apt autoremove -y \
43
+ && apt clean -y \
44
+ && rm -rf /tmp/* /var/tmp/* \
45
+ && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
46
+ && find /var/cache -type f -delete
47
+
48
+ COPY --from=build /app/lib/ /app
49
+
50
+ ### Full
51
+ FROM base AS full
52
+
53
+ COPY --from=build /app/full /app
54
+
55
+ WORKDIR /app
56
+
57
+ RUN apt-get update \
58
+ && apt-get install -y \
59
+ git \
60
+ python3 \
61
+ python3-pip \
62
+ && pip install --upgrade pip setuptools wheel \
63
+ && pip install -r requirements.txt \
64
+ && apt autoremove -y \
65
+ && apt clean -y \
66
+ && rm -rf /tmp/* /var/tmp/* \
67
+ && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
68
+ && find /var/cache -type f -delete
69
+
70
+ ENTRYPOINT ["/app/tools.sh"]
71
+
72
+ ### Light, CLI only
73
+ FROM base AS light
74
+
75
+ COPY --from=build /app/full/llama-cli /app
76
+
77
+ WORKDIR /app
78
+
79
+ ENTRYPOINT [ "/app/llama-cli" ]
80
+
81
+ ### Server, Server only
82
+ FROM base AS server
83
+
84
+ ENV LLAMA_ARG_HOST=0.0.0.0
85
+
86
+ COPY --from=build /app/full/llama-server /app
87
+
88
+ WORKDIR /app
89
+
90
+ HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
91
+
92
+ ENTRYPOINT [ "/app/llama-server" ]
llama.cpp/.devops/cuda.Dockerfile ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG UBUNTU_VERSION=22.04
2
+ # This needs to generally match the container host's environment.
3
+ ARG CUDA_VERSION=12.6.0
4
+ # Target the CUDA build image
5
+ ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
6
+
7
+ ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
8
+
9
+ FROM ${BASE_CUDA_DEV_CONTAINER} AS build
10
+
11
+ # CUDA architecture to build for (defaults to all supported archs)
12
+ ARG CUDA_DOCKER_ARCH=default
13
+
14
+ RUN apt-get update && \
15
+ apt-get install -y build-essential cmake python3 python3-pip git libcurl4-openssl-dev libgomp1
16
+
17
+ WORKDIR /app
18
+
19
+ COPY . .
20
+
21
+ RUN if [ "${CUDA_DOCKER_ARCH}" != "default" ]; then \
22
+ export CMAKE_ARGS="-DCMAKE_CUDA_ARCHITECTURES=${CUDA_DOCKER_ARCH}"; \
23
+ fi && \
24
+ cmake -B build -DGGML_NATIVE=OFF -DGGML_CUDA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
25
+ cmake --build build --config Release -j$(nproc)
26
+
27
+ RUN mkdir -p /app/lib && \
28
+ find build -name "*.so" -exec cp {} /app/lib \;
29
+
30
+ RUN mkdir -p /app/full \
31
+ && cp build/bin/* /app/full \
32
+ && cp *.py /app/full \
33
+ && cp -r gguf-py /app/full \
34
+ && cp -r requirements /app/full \
35
+ && cp requirements.txt /app/full \
36
+ && cp .devops/tools.sh /app/full/tools.sh
37
+
38
+ ## Base image
39
+ FROM ${BASE_CUDA_RUN_CONTAINER} AS base
40
+
41
+ RUN apt-get update \
42
+ && apt-get install -y libgomp1 curl\
43
+ && apt autoremove -y \
44
+ && apt clean -y \
45
+ && rm -rf /tmp/* /var/tmp/* \
46
+ && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
47
+ && find /var/cache -type f -delete
48
+
49
+ COPY --from=build /app/lib/ /app
50
+
51
+ ### Full
52
+ FROM base AS full
53
+
54
+ COPY --from=build /app/full /app
55
+
56
+ WORKDIR /app
57
+
58
+ RUN apt-get update \
59
+ && apt-get install -y \
60
+ git \
61
+ python3 \
62
+ python3-pip \
63
+ && pip install --upgrade pip setuptools wheel \
64
+ && pip install -r requirements.txt \
65
+ && apt autoremove -y \
66
+ && apt clean -y \
67
+ && rm -rf /tmp/* /var/tmp/* \
68
+ && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
69
+ && find /var/cache -type f -delete
70
+
71
+
72
+ ENTRYPOINT ["/app/tools.sh"]
73
+
74
+ ### Light, CLI only
75
+ FROM base AS light
76
+
77
+ COPY --from=build /app/full/llama-cli /app
78
+
79
+ WORKDIR /app
80
+
81
+ ENTRYPOINT [ "/app/llama-cli" ]
82
+
83
+ ### Server, Server only
84
+ FROM base AS server
85
+
86
+ ENV LLAMA_ARG_HOST=0.0.0.0
87
+
88
+ COPY --from=build /app/full/llama-server /app
89
+
90
+ WORKDIR /app
91
+
92
+ HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
93
+
94
+ ENTRYPOINT [ "/app/llama-server" ]
llama.cpp/.devops/intel.Dockerfile ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG ONEAPI_VERSION=2025.0.0-0-devel-ubuntu22.04
2
+
3
+ ## Build Image
4
+
5
+ FROM intel/oneapi-basekit:$ONEAPI_VERSION AS build
6
+
7
+ ARG GGML_SYCL_F16=OFF
8
+ RUN apt-get update && \
9
+ apt-get install -y git libcurl4-openssl-dev
10
+
11
+ WORKDIR /app
12
+
13
+ COPY . .
14
+
15
+ RUN if [ "${GGML_SYCL_F16}" = "ON" ]; then \
16
+ echo "GGML_SYCL_F16 is set" \
17
+ && export OPT_SYCL_F16="-DGGML_SYCL_F16=ON"; \
18
+ fi && \
19
+ echo "Building with dynamic libs" && \
20
+ cmake -B build -DGGML_NATIVE=OFF -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_CURL=ON ${OPT_SYCL_F16} && \
21
+ cmake --build build --config Release -j$(nproc)
22
+
23
+ RUN mkdir -p /app/lib && \
24
+ find build -name "*.so" -exec cp {} /app/lib \;
25
+
26
+ RUN mkdir -p /app/full \
27
+ && cp build/bin/* /app/full \
28
+ && cp *.py /app/full \
29
+ && cp -r gguf-py /app/full \
30
+ && cp -r requirements /app/full \
31
+ && cp requirements.txt /app/full \
32
+ && cp .devops/tools.sh /app/full/tools.sh
33
+
34
+ FROM intel/oneapi-basekit:$ONEAPI_VERSION AS base
35
+
36
+ RUN apt-get update \
37
+ && apt-get install -y libgomp1 curl\
38
+ && apt autoremove -y \
39
+ && apt clean -y \
40
+ && rm -rf /tmp/* /var/tmp/* \
41
+ && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
42
+ && find /var/cache -type f -delete
43
+
44
+ ### Full
45
+ FROM base AS full
46
+
47
+ COPY --from=build /app/lib/ /app
48
+ COPY --from=build /app/full /app
49
+
50
+ WORKDIR /app
51
+
52
+ RUN apt-get update \
53
+ && apt-get install -y \
54
+ git \
55
+ python3 \
56
+ python3-pip \
57
+ && pip install --upgrade pip setuptools wheel \
58
+ && pip install -r requirements.txt \
59
+ && apt autoremove -y \
60
+ && apt clean -y \
61
+ && rm -rf /tmp/* /var/tmp/* \
62
+ && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
63
+ && find /var/cache -type f -delete
64
+
65
+
66
+ ENTRYPOINT ["/app/tools.sh"]
67
+
68
+ ### Light, CLI only
69
+ FROM base AS light
70
+
71
+ COPY --from=build /app/lib/ /app
72
+ COPY --from=build /app/full/llama-cli /app
73
+
74
+ WORKDIR /app
75
+
76
+ ENTRYPOINT [ "/app/llama-cli" ]
77
+
78
+ ### Server, Server only
79
+ FROM base AS server
80
+
81
+ ENV LLAMA_ARG_HOST=0.0.0.0
82
+
83
+ COPY --from=build /app/lib/ /app
84
+ COPY --from=build /app/full/llama-server /app
85
+
86
+ WORKDIR /app
87
+
88
+ HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
89
+
90
+ ENTRYPOINT [ "/app/llama-server" ]
91
+
llama.cpp/.devops/llama-cli-cann.Dockerfile ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG ASCEND_VERSION=8.0.rc2.alpha003-910b-openeuler22.03-py3.8
2
+
3
+ FROM ascendai/cann:$ASCEND_VERSION AS build
4
+
5
+ WORKDIR /app
6
+
7
+ COPY . .
8
+
9
+ RUN yum install -y gcc g++ cmake make
10
+ ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
11
+ ENV LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:$LIBRARY_PATH
12
+ ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH}
13
+ ENV PYTHONPATH=${ASCEND_TOOLKIT_HOME}/python/site-packages:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe:${PYTHONPATH}
14
+ ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${ASCEND_TOOLKIT_HOME}/compiler/ccec_compiler/bin:${PATH}
15
+ ENV ASCEND_AICPU_PATH=${ASCEND_TOOLKIT_HOME}
16
+ ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
17
+ ENV TOOLCHAIN_HOME=${ASCEND_TOOLKIT_HOME}/toolkit
18
+ ENV ASCEND_HOME_PATH=${ASCEND_TOOLKIT_HOME}
19
+
20
+ # find libascend_hal.so, because the drive hasn`t been mounted.
21
+ ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/runtime/lib64/stub:$LD_LIBRARY_PATH
22
+
23
+ RUN echo "Building with static libs" && \
24
+ source /usr/local/Ascend/ascend-toolkit/set_env.sh --force && \
25
+ cmake -B build -DGGML_NATIVE=OFF -DGGML_CANN=ON -DBUILD_SHARED_LIBS=OFF && \
26
+ cmake --build build --config Release --target llama-cli
27
+
28
+ # TODO: use image with NNRT
29
+ FROM ascendai/cann:$ASCEND_VERSION AS runtime
30
+ COPY --from=build /app/build/bin/llama-cli /llama-cli
31
+
32
+ ENV LC_ALL=C.utf8
33
+
34
+ ENV ASCEND_TOOLKIT_HOME=/usr/local/Ascend/ascend-toolkit/latest
35
+ ENV LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:$LIBRARY_PATH
36
+ ENV LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/lib64/plugin/opskernel:${ASCEND_TOOLKIT_HOME}/lib64/plugin/nnengine:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe/op_tiling:${LD_LIBRARY_PATH}
37
+ ENV PYTHONPATH=${ASCEND_TOOLKIT_HOME}/python/site-packages:${ASCEND_TOOLKIT_HOME}/opp/built-in/op_impl/ai_core/tbe:${PYTHONPATH}
38
+ ENV PATH=${ASCEND_TOOLKIT_HOME}/bin:${ASCEND_TOOLKIT_HOME}/compiler/ccec_compiler/bin:${PATH}
39
+ ENV ASCEND_AICPU_PATH=${ASCEND_TOOLKIT_HOME}
40
+ ENV ASCEND_OPP_PATH=${ASCEND_TOOLKIT_HOME}/opp
41
+ ENV TOOLCHAIN_HOME=${ASCEND_TOOLKIT_HOME}/toolkit
42
+ ENV ASCEND_HOME_PATH=${ASCEND_TOOLKIT_HOME}
43
+
44
+ ENTRYPOINT ["/llama-cli" ]
llama.cpp/.devops/llama-cpp-cuda.srpm.spec ADDED
@@ -0,0 +1,83 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SRPM for building from source and packaging an RPM for RPM-based distros.
2
+ # https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages
3
+ # Built and maintained by John Boero - [email protected]
4
+ # In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
5
+
6
+ # Notes for llama.cpp:
7
+ # 1. Tags are currently based on hash - which will not sort asciibetically.
8
+ # We need to declare standard versioning if people want to sort latest releases.
9
+ # 2. Builds for CUDA/OpenCL support are separate, with different depenedencies.
10
+ # 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed.
11
+ # Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo
12
+ # 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries.
13
+ # It is up to the user to install the correct vendor-specific support.
14
+
15
+ Name: llama.cpp-cuda
16
+ Version: %( date "+%%Y%%m%%d" )
17
+ Release: 1%{?dist}
18
+ Summary: CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL)
19
+ License: MIT
20
+ Source0: https://github.com/ggerganov/llama.cpp/archive/refs/heads/master.tar.gz
21
+ BuildRequires: coreutils make gcc-c++ git cuda-toolkit
22
+ Requires: cuda-toolkit
23
+ URL: https://github.com/ggerganov/llama.cpp
24
+
25
+ %define debug_package %{nil}
26
+ %define source_date_epoch_from_changelog 0
27
+
28
+ %description
29
+ CPU inference for Meta's Lllama2 models using default options.
30
+
31
+ %prep
32
+ %setup -n llama.cpp-master
33
+
34
+ %build
35
+ make -j GGML_CUDA=1
36
+
37
+ %install
38
+ mkdir -p %{buildroot}%{_bindir}/
39
+ cp -p llama-cli %{buildroot}%{_bindir}/llama-cuda-cli
40
+ cp -p llama-server %{buildroot}%{_bindir}/llama-cuda-server
41
+ cp -p llama-simple %{buildroot}%{_bindir}/llama-cuda-simple
42
+
43
+ mkdir -p %{buildroot}/usr/lib/systemd/system
44
+ %{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llamacuda.service
45
+ [Unit]
46
+ Description=Llama.cpp server, CPU only (no GPU support in this build).
47
+ After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target
48
+
49
+ [Service]
50
+ Type=simple
51
+ EnvironmentFile=/etc/sysconfig/llama
52
+ ExecStart=/usr/bin/llama-cuda-server $LLAMA_ARGS
53
+ ExecReload=/bin/kill -s HUP $MAINPID
54
+ Restart=never
55
+
56
+ [Install]
57
+ WantedBy=default.target
58
+ EOF
59
+
60
+ mkdir -p %{buildroot}/etc/sysconfig
61
+ %{__cat} <<EOF > %{buildroot}/etc/sysconfig/llama
62
+ LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin"
63
+ EOF
64
+
65
+ %clean
66
+ rm -rf %{buildroot}
67
+ rm -rf %{_builddir}/*
68
+
69
+ %files
70
+ %{_bindir}/llama-cuda-cli
71
+ %{_bindir}/llama-cuda-server
72
+ %{_bindir}/llama-cuda-simple
73
+ /usr/lib/systemd/system/llamacuda.service
74
+ %config /etc/sysconfig/llama
75
+
76
+ %pre
77
+
78
+ %post
79
+
80
+ %preun
81
+ %postun
82
+
83
+ %changelog
llama.cpp/.devops/llama-cpp.srpm.spec ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # SRPM for building from source and packaging an RPM for RPM-based distros.
2
+ # https://docs.fedoraproject.org/en-US/quick-docs/creating-rpm-packages
3
+ # Built and maintained by John Boero - [email protected]
4
+ # In honor of Seth Vidal https://www.redhat.com/it/blog/thank-you-seth-vidal
5
+
6
+ # Notes for llama.cpp:
7
+ # 1. Tags are currently based on hash - which will not sort asciibetically.
8
+ # We need to declare standard versioning if people want to sort latest releases.
9
+ # In the meantime, YYYYMMDD format will be used.
10
+ # 2. Builds for CUDA/OpenCL support are separate, with different depenedencies.
11
+ # 3. NVidia's developer repo must be enabled with nvcc, cublas, clblas, etc installed.
12
+ # Example: https://developer.download.nvidia.com/compute/cuda/repos/fedora37/x86_64/cuda-fedora37.repo
13
+ # 4. OpenCL/CLBLAST support simply requires the ICD loader and basic opencl libraries.
14
+ # It is up to the user to install the correct vendor-specific support.
15
+
16
+ Name: llama.cpp
17
+ Version: %( date "+%%Y%%m%%d" )
18
+ Release: 1%{?dist}
19
+ Summary: CPU Inference of LLaMA model in pure C/C++ (no CUDA/OpenCL)
20
+ License: MIT
21
+ Source0: https://github.com/ggerganov/llama.cpp/archive/refs/heads/master.tar.gz
22
+ BuildRequires: coreutils make gcc-c++ git libstdc++-devel
23
+ Requires: libstdc++
24
+ URL: https://github.com/ggerganov/llama.cpp
25
+
26
+ %define debug_package %{nil}
27
+ %define source_date_epoch_from_changelog 0
28
+
29
+ %description
30
+ CPU inference for Meta's Lllama2 models using default options.
31
+ Models are not included in this package and must be downloaded separately.
32
+
33
+ %prep
34
+ %setup -n llama.cpp-master
35
+
36
+ %build
37
+ make -j
38
+
39
+ %install
40
+ mkdir -p %{buildroot}%{_bindir}/
41
+ cp -p llama-cli %{buildroot}%{_bindir}/llama-cli
42
+ cp -p llama-server %{buildroot}%{_bindir}/llama-server
43
+ cp -p llama-simple %{buildroot}%{_bindir}/llama-simple
44
+
45
+ mkdir -p %{buildroot}/usr/lib/systemd/system
46
+ %{__cat} <<EOF > %{buildroot}/usr/lib/systemd/system/llama.service
47
+ [Unit]
48
+ Description=Llama.cpp server, CPU only (no GPU support in this build).
49
+ After=syslog.target network.target local-fs.target remote-fs.target nss-lookup.target
50
+
51
+ [Service]
52
+ Type=simple
53
+ EnvironmentFile=/etc/sysconfig/llama
54
+ ExecStart=/usr/bin/llama-server $LLAMA_ARGS
55
+ ExecReload=/bin/kill -s HUP $MAINPID
56
+ Restart=never
57
+
58
+ [Install]
59
+ WantedBy=default.target
60
+ EOF
61
+
62
+ mkdir -p %{buildroot}/etc/sysconfig
63
+ %{__cat} <<EOF > %{buildroot}/etc/sysconfig/llama
64
+ LLAMA_ARGS="-m /opt/llama2/ggml-model-f32.bin"
65
+ EOF
66
+
67
+ %clean
68
+ rm -rf %{buildroot}
69
+ rm -rf %{_builddir}/*
70
+
71
+ %files
72
+ %{_bindir}/llama-cli
73
+ %{_bindir}/llama-server
74
+ %{_bindir}/llama-simple
75
+ /usr/lib/systemd/system/llama.service
76
+ %config /etc/sysconfig/llama
77
+
78
+ %pre
79
+
80
+ %post
81
+
82
+ %preun
83
+ %postun
84
+
85
+ %changelog
llama.cpp/.devops/musa.Dockerfile ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG UBUNTU_VERSION=22.04
2
+ # This needs to generally match the container host's environment.
3
+ ARG MUSA_VERSION=rc3.1.0
4
+ # Target the MUSA build image
5
+ ARG BASE_MUSA_DEV_CONTAINER=mthreads/musa:${MUSA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
6
+
7
+ ARG BASE_MUSA_RUN_CONTAINER=mthreads/musa:${MUSA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
8
+
9
+ FROM ${BASE_MUSA_DEV_CONTAINER} AS build
10
+
11
+ # MUSA architecture to build for (defaults to all supported archs)
12
+ ARG MUSA_DOCKER_ARCH=default
13
+
14
+ RUN apt-get update && \
15
+ apt-get install -y \
16
+ build-essential \
17
+ cmake \
18
+ python3 \
19
+ python3-pip \
20
+ git \
21
+ libcurl4-openssl-dev \
22
+ libgomp1
23
+
24
+ COPY requirements.txt requirements.txt
25
+ COPY requirements requirements
26
+
27
+ RUN pip install --upgrade pip setuptools wheel \
28
+ && pip install -r requirements.txt
29
+
30
+ WORKDIR /app
31
+
32
+ COPY . .
33
+
34
+ # Use the default MUSA archs if not specified
35
+ RUN if [ "${MUSA_DOCKER_ARCH}" != "default" ]; then \
36
+ export CMAKE_ARGS="-DMUSA_ARCHITECTURES=${MUSA_DOCKER_ARCH}"; \
37
+ fi && \
38
+ cmake -B build -DGGML_NATIVE=OFF -DGGML_MUSA=ON -DLLAMA_CURL=ON ${CMAKE_ARGS} -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined . && \
39
+ cmake --build build --config Release -j$(nproc)
40
+
41
+ RUN mkdir -p /app/lib && \
42
+ find build -name "*.so" -exec cp {} /app/lib \;
43
+
44
+ RUN mkdir -p /app/full \
45
+ && cp build/bin/* /app/full \
46
+ && cp *.py /app/full \
47
+ && cp -r gguf-py /app/full \
48
+ && cp -r requirements /app/full \
49
+ && cp requirements.txt /app/full \
50
+ && cp .devops/tools.sh /app/full/tools.sh
51
+
52
+ ## Base image
53
+ FROM ${BASE_MUSA_RUN_CONTAINER} AS base
54
+
55
+ RUN apt-get update \
56
+ && apt-get install -y libgomp1 curl\
57
+ && apt autoremove -y \
58
+ && apt clean -y \
59
+ && rm -rf /tmp/* /var/tmp/* \
60
+ && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
61
+ && find /var/cache -type f -delete
62
+
63
+ COPY --from=build /app/lib/ /app
64
+
65
+ ### Full
66
+ FROM base AS full
67
+
68
+ COPY --from=build /app/full /app
69
+
70
+ WORKDIR /app
71
+
72
+ RUN apt-get update \
73
+ && apt-get install -y \
74
+ git \
75
+ python3 \
76
+ python3-pip \
77
+ && pip install --upgrade pip setuptools wheel \
78
+ && pip install -r requirements.txt \
79
+ && apt autoremove -y \
80
+ && apt clean -y \
81
+ && rm -rf /tmp/* /var/tmp/* \
82
+ && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
83
+ && find /var/cache -type f -delete
84
+
85
+
86
+ ENTRYPOINT ["/app/tools.sh"]
87
+
88
+ ### Light, CLI only
89
+ FROM base AS light
90
+
91
+ COPY --from=build /app/full/llama-cli /app
92
+
93
+ WORKDIR /app
94
+
95
+ ENTRYPOINT [ "/app/llama-cli" ]
96
+
97
+ ### Server, Server only
98
+ FROM base AS server
99
+
100
+ ENV LLAMA_ARG_HOST=0.0.0.0
101
+
102
+ COPY --from=build /app/full/llama-server /app
103
+
104
+ WORKDIR /app
105
+
106
+ HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
107
+
108
+ ENTRYPOINT [ "/app/llama-server" ]
llama.cpp/.devops/nix/apps.nix ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ perSystem =
3
+ { config, lib, ... }:
4
+ {
5
+ apps =
6
+ let
7
+ inherit (config.packages) default;
8
+ binaries = [
9
+ "llama-cli"
10
+ "llama-embedding"
11
+ "llama-server"
12
+ "llama-quantize"
13
+ ];
14
+ mkApp = name: {
15
+ type = "app";
16
+ program = "${default}/bin/${name}";
17
+ };
18
+ in
19
+ lib.genAttrs binaries mkApp;
20
+ };
21
+ }
llama.cpp/.devops/nix/devshells.nix ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ { inputs, ... }:
2
+
3
+ {
4
+ perSystem =
5
+ {
6
+ config,
7
+ lib,
8
+ system,
9
+ ...
10
+ }:
11
+ {
12
+ devShells =
13
+ let
14
+ pkgs = import inputs.nixpkgs { inherit system; };
15
+ stdenv = pkgs.stdenv;
16
+ scripts = config.packages.python-scripts;
17
+ in
18
+ lib.pipe (config.packages) [
19
+ (lib.concatMapAttrs (
20
+ name: package: {
21
+ ${name} = pkgs.mkShell {
22
+ name = "${name}";
23
+ inputsFrom = [ package ];
24
+ shellHook = ''
25
+ echo "Entering ${name} devShell"
26
+ '';
27
+ };
28
+ "${name}-extra" =
29
+ if (name == "python-scripts") then
30
+ null
31
+ else
32
+ pkgs.mkShell {
33
+ name = "${name}-extra";
34
+ inputsFrom = [
35
+ package
36
+ scripts
37
+ ];
38
+ # Extra packages that *may* be used by some scripts
39
+ packages = [
40
+ pkgs.python3Packages.tiktoken
41
+ ];
42
+ shellHook = ''
43
+ echo "Entering ${name} devShell"
44
+ addToSearchPath "LD_LIBRARY_PATH" "${lib.getLib stdenv.cc.cc}/lib"
45
+ '';
46
+ };
47
+ }
48
+ ))
49
+ (lib.filterAttrs (name: value: value != null))
50
+ ];
51
+ };
52
+ }
llama.cpp/.devops/nix/docker.nix ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ lib,
3
+ dockerTools,
4
+ buildEnv,
5
+ llama-cpp,
6
+ interactive ? true,
7
+ coreutils,
8
+ }:
9
+
10
+ # A tar that can be fed into `docker load`:
11
+ #
12
+ # $ nix build .#llamaPackages.docker
13
+ # $ docker load < result
14
+
15
+ # For details and variations cf.
16
+ # - https://nixos.org/manual/nixpkgs/unstable/#ssec-pkgs-dockerTools-buildLayeredImage
17
+ # - https://discourse.nixos.org/t/a-faster-dockertools-buildimage-prototype/16922
18
+ # - https://nixery.dev/
19
+
20
+ # Approximate (compressed) sizes, at the time of writing, are:
21
+ #
22
+ # .#llamaPackages.docker: 125M;
23
+ # .#llamaPackagesCuda.docker: 537M;
24
+ # .#legacyPackages.aarch64-linux.llamaPackagesXavier.docker: 415M.
25
+
26
+ dockerTools.buildLayeredImage {
27
+ name = llama-cpp.pname;
28
+ tag = "latest";
29
+
30
+ contents =
31
+ [ llama-cpp ]
32
+ ++ lib.optionals interactive [
33
+ coreutils
34
+ dockerTools.binSh
35
+ dockerTools.caCertificates
36
+ ];
37
+ }
llama.cpp/.devops/nix/jetson-support.nix ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ { inputs, ... }:
2
+ {
3
+ perSystem =
4
+ {
5
+ config,
6
+ system,
7
+ lib,
8
+ pkgsCuda,
9
+ ...
10
+ }:
11
+ {
12
+ legacyPackages =
13
+ let
14
+ caps.llamaPackagesXavier = "7.2";
15
+ caps.llamaPackagesOrin = "8.7";
16
+ caps.llamaPackagesTX2 = "6.2";
17
+ caps.llamaPackagesNano = "5.3";
18
+
19
+ pkgsFor =
20
+ cap:
21
+ import inputs.nixpkgs {
22
+ inherit system;
23
+ config = {
24
+ cudaSupport = true;
25
+ cudaCapabilities = [ cap ];
26
+ cudaEnableForwardCompat = false;
27
+ inherit (pkgsCuda.config) allowUnfreePredicate;
28
+ };
29
+ };
30
+ in
31
+ builtins.mapAttrs (name: cap: (pkgsFor cap).callPackage ./scope.nix { }) caps;
32
+
33
+ packages = lib.optionalAttrs (system == "aarch64-linux") {
34
+ jetson-xavier = config.legacyPackages.llamaPackagesXavier.llama-cpp;
35
+ jetson-orin = config.legacyPackages.llamaPackagesOrin.llama-cpp;
36
+ jetson-nano = config.legacyPackages.llamaPackagesNano.llama-cpp;
37
+ };
38
+ };
39
+ }
llama.cpp/.devops/nix/nixpkgs-instances.nix ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ { inputs, ... }:
2
+ {
3
+ # The _module.args definitions are passed on to modules as arguments. E.g.
4
+ # the module `{ pkgs ... }: { /* config */ }` implicitly uses
5
+ # `_module.args.pkgs` (defined in this case by flake-parts).
6
+ perSystem =
7
+ { system, ... }:
8
+ {
9
+ _module.args = {
10
+ # Note: bringing up https://zimbatm.com/notes/1000-instances-of-nixpkgs
11
+ # again, the below creates several nixpkgs instances which the
12
+ # flake-centric CLI will be forced to evaluate e.g. on `nix flake show`.
13
+ #
14
+ # This is currently "slow" and "expensive", on a certain scale.
15
+ # This also isn't "right" in that this hinders dependency injection at
16
+ # the level of flake inputs. This might get removed in the foreseeable
17
+ # future.
18
+ #
19
+ # Note that you can use these expressions without Nix
20
+ # (`pkgs.callPackage ./devops/nix/scope.nix { }` is the entry point).
21
+
22
+ pkgsCuda = import inputs.nixpkgs {
23
+ inherit system;
24
+ # Ensure dependencies use CUDA consistently (e.g. that openmpi, ucc,
25
+ # and ucx are built with CUDA support)
26
+ config.cudaSupport = true;
27
+ config.allowUnfreePredicate =
28
+ p:
29
+ builtins.all (
30
+ license:
31
+ license.free
32
+ || builtins.elem license.shortName [
33
+ "CUDA EULA"
34
+ "cuDNN EULA"
35
+ ]
36
+ ) (p.meta.licenses or [ p.meta.license ]);
37
+ };
38
+ # Ensure dependencies use ROCm consistently
39
+ pkgsRocm = import inputs.nixpkgs {
40
+ inherit system;
41
+ config.rocmSupport = true;
42
+ };
43
+ };
44
+ };
45
+ }
llama.cpp/.devops/nix/package-gguf-py.nix ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ lib,
3
+ llamaVersion,
4
+ numpy,
5
+ tqdm,
6
+ sentencepiece,
7
+ pyyaml,
8
+ poetry-core,
9
+ buildPythonPackage,
10
+ pytestCheckHook,
11
+ }:
12
+
13
+ buildPythonPackage {
14
+ pname = "gguf";
15
+ version = llamaVersion;
16
+ pyproject = true;
17
+ nativeBuildInputs = [ poetry-core ];
18
+ propagatedBuildInputs = [
19
+ numpy
20
+ tqdm
21
+ sentencepiece
22
+ pyyaml
23
+ ];
24
+ src = lib.cleanSource ../../gguf-py;
25
+ pythonImportsCheck = [
26
+ "numpy"
27
+ "gguf"
28
+ ];
29
+ nativeCheckInputs = [ pytestCheckHook ];
30
+ doCheck = true;
31
+ meta = with lib; {
32
+ description = "Python package for writing binary files in the GGUF format";
33
+ license = licenses.mit;
34
+ maintainers = [ maintainers.ditsuke ];
35
+ };
36
+ }
llama.cpp/.devops/nix/package.nix ADDED
@@ -0,0 +1,247 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ lib,
3
+ glibc,
4
+ config,
5
+ stdenv,
6
+ runCommand,
7
+ cmake,
8
+ ninja,
9
+ pkg-config,
10
+ git,
11
+ mpi,
12
+ blas,
13
+ cudaPackages,
14
+ autoAddDriverRunpath,
15
+ darwin,
16
+ rocmPackages,
17
+ vulkan-headers,
18
+ vulkan-loader,
19
+ curl,
20
+ shaderc,
21
+ useBlas ?
22
+ builtins.all (x: !x) [
23
+ useCuda
24
+ useMetalKit
25
+ useRocm
26
+ useVulkan
27
+ ]
28
+ && blas.meta.available,
29
+ useCuda ? config.cudaSupport,
30
+ useMetalKit ? stdenv.isAarch64 && stdenv.isDarwin,
31
+ # Increases the runtime closure size by ~700M
32
+ useMpi ? false,
33
+ useRocm ? config.rocmSupport,
34
+ rocmGpuTargets ? builtins.concatStringsSep ";" rocmPackages.clr.gpuTargets,
35
+ enableCurl ? true,
36
+ useVulkan ? false,
37
+ llamaVersion ? "0.0.0", # Arbitrary version, substituted by the flake
38
+
39
+ # It's necessary to consistently use backendStdenv when building with CUDA support,
40
+ # otherwise we get libstdc++ errors downstream.
41
+ effectiveStdenv ? if useCuda then cudaPackages.backendStdenv else stdenv,
42
+ enableStatic ? effectiveStdenv.hostPlatform.isStatic,
43
+ precompileMetalShaders ? false,
44
+ }:
45
+
46
+ let
47
+ inherit (lib)
48
+ cmakeBool
49
+ cmakeFeature
50
+ optionals
51
+ strings
52
+ ;
53
+
54
+ stdenv = throw "Use effectiveStdenv instead";
55
+
56
+ suffices =
57
+ lib.optionals useBlas [ "BLAS" ]
58
+ ++ lib.optionals useCuda [ "CUDA" ]
59
+ ++ lib.optionals useMetalKit [ "MetalKit" ]
60
+ ++ lib.optionals useMpi [ "MPI" ]
61
+ ++ lib.optionals useRocm [ "ROCm" ]
62
+ ++ lib.optionals useVulkan [ "Vulkan" ];
63
+
64
+ pnameSuffix =
65
+ strings.optionalString (suffices != [ ])
66
+ "-${strings.concatMapStringsSep "-" strings.toLower suffices}";
67
+ descriptionSuffix = strings.optionalString (
68
+ suffices != [ ]
69
+ ) ", accelerated with ${strings.concatStringsSep ", " suffices}";
70
+
71
+ xcrunHost = runCommand "xcrunHost" { } ''
72
+ mkdir -p $out/bin
73
+ ln -s /usr/bin/xcrun $out/bin
74
+ '';
75
+
76
+ # apple_sdk is supposed to choose sane defaults, no need to handle isAarch64
77
+ # separately
78
+ darwinBuildInputs =
79
+ with darwin.apple_sdk.frameworks;
80
+ [
81
+ Accelerate
82
+ CoreVideo
83
+ CoreGraphics
84
+ ]
85
+ ++ optionals useMetalKit [ MetalKit ];
86
+
87
+ cudaBuildInputs = with cudaPackages; [
88
+ cuda_cudart
89
+ cuda_cccl # <nv/target>
90
+ libcublas
91
+ ];
92
+
93
+ rocmBuildInputs = with rocmPackages; [
94
+ clr
95
+ hipblas
96
+ rocblas
97
+ ];
98
+
99
+ vulkanBuildInputs = [
100
+ vulkan-headers
101
+ vulkan-loader
102
+ shaderc
103
+ ];
104
+ in
105
+
106
+ effectiveStdenv.mkDerivation (finalAttrs: {
107
+ pname = "llama-cpp${pnameSuffix}";
108
+ version = llamaVersion;
109
+
110
+ # Note: none of the files discarded here are visible in the sandbox or
111
+ # affect the output hash. This also means they can be modified without
112
+ # triggering a rebuild.
113
+ src = lib.cleanSourceWith {
114
+ filter =
115
+ name: type:
116
+ let
117
+ noneOf = builtins.all (x: !x);
118
+ baseName = baseNameOf name;
119
+ in
120
+ noneOf [
121
+ (lib.hasSuffix ".nix" name) # Ignore *.nix files when computing outPaths
122
+ (lib.hasSuffix ".md" name) # Ignore *.md changes whe computing outPaths
123
+ (lib.hasPrefix "." baseName) # Skip hidden files and directories
124
+ (baseName == "flake.lock")
125
+ ];
126
+ src = lib.cleanSource ../../.;
127
+ };
128
+
129
+ postPatch = ''
130
+ substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \
131
+ --replace '[bundle pathForResource:@"ggml-metal" ofType:@"metal"];' "@\"$out/bin/ggml-metal.metal\";"
132
+ substituteInPlace ./ggml/src/ggml-metal/ggml-metal.m \
133
+ --replace '[bundle pathForResource:@"default" ofType:@"metallib"];' "@\"$out/bin/default.metallib\";"
134
+ '';
135
+
136
+ # With PR#6015 https://github.com/ggerganov/llama.cpp/pull/6015,
137
+ # `default.metallib` may be compiled with Metal compiler from XCode
138
+ # and we need to escape sandbox on MacOS to access Metal compiler.
139
+ # `xcrun` is used find the path of the Metal compiler, which is varible
140
+ # and not on $PATH
141
+ # see https://github.com/ggerganov/llama.cpp/pull/6118 for discussion
142
+ __noChroot = effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders;
143
+
144
+ nativeBuildInputs =
145
+ [
146
+ cmake
147
+ ninja
148
+ pkg-config
149
+ git
150
+ ]
151
+ ++ optionals useCuda [
152
+ cudaPackages.cuda_nvcc
153
+
154
+ autoAddDriverRunpath
155
+ ]
156
+ ++ optionals (effectiveStdenv.hostPlatform.isGnu && enableStatic) [ glibc.static ]
157
+ ++ optionals (effectiveStdenv.isDarwin && useMetalKit && precompileMetalShaders) [ xcrunHost ];
158
+
159
+ buildInputs =
160
+ optionals effectiveStdenv.isDarwin darwinBuildInputs
161
+ ++ optionals useCuda cudaBuildInputs
162
+ ++ optionals useMpi [ mpi ]
163
+ ++ optionals useRocm rocmBuildInputs
164
+ ++ optionals useBlas [ blas ]
165
+ ++ optionals useVulkan vulkanBuildInputs
166
+ ++ optionals enableCurl [ curl ];
167
+
168
+ cmakeFlags =
169
+ [
170
+ (cmakeBool "LLAMA_BUILD_SERVER" true)
171
+ (cmakeBool "BUILD_SHARED_LIBS" (!enableStatic))
172
+ (cmakeBool "CMAKE_SKIP_BUILD_RPATH" true)
173
+ (cmakeBool "LLAMA_CURL" enableCurl)
174
+ (cmakeBool "GGML_NATIVE" false)
175
+ (cmakeBool "GGML_BLAS" useBlas)
176
+ (cmakeBool "GGML_CUDA" useCuda)
177
+ (cmakeBool "GGML_HIP" useRocm)
178
+ (cmakeBool "GGML_METAL" useMetalKit)
179
+ (cmakeBool "GGML_VULKAN" useVulkan)
180
+ (cmakeBool "GGML_STATIC" enableStatic)
181
+ ]
182
+ ++ optionals useCuda [
183
+ (
184
+ with cudaPackages.flags;
185
+ cmakeFeature "CMAKE_CUDA_ARCHITECTURES" (
186
+ builtins.concatStringsSep ";" (map dropDot cudaCapabilities)
187
+ )
188
+ )
189
+ ]
190
+ ++ optionals useRocm [
191
+ (cmakeFeature "CMAKE_HIP_COMPILER" "${rocmPackages.llvm.clang}/bin/clang")
192
+ (cmakeFeature "CMAKE_HIP_ARCHITECTURES" rocmGpuTargets)
193
+ ]
194
+ ++ optionals useMetalKit [
195
+ (lib.cmakeFeature "CMAKE_C_FLAGS" "-D__ARM_FEATURE_DOTPROD=1")
196
+ (cmakeBool "GGML_METAL_EMBED_LIBRARY" (!precompileMetalShaders))
197
+ ];
198
+
199
+ # Environment variables needed for ROCm
200
+ env = optionals useRocm {
201
+ ROCM_PATH = "${rocmPackages.clr}";
202
+ HIP_DEVICE_LIB_PATH = "${rocmPackages.rocm-device-libs}/amdgcn/bitcode";
203
+ };
204
+
205
+ # TODO(SomeoneSerge): It's better to add proper install targets at the CMake level,
206
+ # if they haven't been added yet.
207
+ postInstall = ''
208
+ mkdir -p $out/include
209
+ cp $src/include/llama.h $out/include/
210
+ '';
211
+
212
+ meta = {
213
+ # Configurations we don't want even the CI to evaluate. Results in the
214
+ # "unsupported platform" messages. This is mostly a no-op, because
215
+ # cudaPackages would've refused to evaluate anyway.
216
+ badPlatforms = optionals useCuda lib.platforms.darwin;
217
+
218
+ # Configurations that are known to result in build failures. Can be
219
+ # overridden by importing Nixpkgs with `allowBroken = true`.
220
+ broken = (useMetalKit && !effectiveStdenv.isDarwin);
221
+
222
+ description = "Inference of LLaMA model in pure C/C++${descriptionSuffix}";
223
+ homepage = "https://github.com/ggerganov/llama.cpp/";
224
+ license = lib.licenses.mit;
225
+
226
+ # Accommodates `nix run` and `lib.getExe`
227
+ mainProgram = "llama-cli";
228
+
229
+ # These people might respond, on the best effort basis, if you ping them
230
+ # in case of Nix-specific regressions or for reviewing Nix-specific PRs.
231
+ # Consider adding yourself to this list if you want to ensure this flake
232
+ # stays maintained and you're willing to invest your time. Do not add
233
+ # other people without their consent. Consider removing people after
234
+ # they've been unreachable for long periods of time.
235
+
236
+ # Note that lib.maintainers is defined in Nixpkgs, but you may just add
237
+ # an attrset following the same format as in
238
+ # https://github.com/NixOS/nixpkgs/blob/f36a80e54da29775c78d7eff0e628c2b4e34d1d7/maintainers/maintainer-list.nix
239
+ maintainers = with lib.maintainers; [
240
+ philiptaron
241
+ SomeoneSerge
242
+ ];
243
+
244
+ # Extend `badPlatforms` instead
245
+ platforms = lib.platforms.all;
246
+ };
247
+ })
llama.cpp/.devops/nix/python-scripts.nix ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ lib,
3
+ stdenv,
4
+ buildPythonPackage,
5
+ poetry-core,
6
+ mkShell,
7
+ python3Packages,
8
+ gguf-py,
9
+ }@inputs:
10
+
11
+ let
12
+ llama-python-deps = with python3Packages; [
13
+ numpy
14
+ sentencepiece
15
+ transformers
16
+ protobuf
17
+ torchWithoutCuda
18
+ gguf-py
19
+ tqdm
20
+
21
+ # for scripts/compare-llama-bench.py
22
+ gitpython
23
+ tabulate
24
+
25
+ # for examples/pydantic-models-to-grammar-examples.py
26
+ docstring-parser
27
+ pydantic
28
+
29
+ ];
30
+
31
+ llama-python-test-deps = with python3Packages; [
32
+ # Server bench
33
+ matplotlib
34
+
35
+ # server tests
36
+ openai
37
+ pytest
38
+ prometheus-client
39
+ ];
40
+ in
41
+
42
+ buildPythonPackage ({
43
+ pname = "llama-scripts";
44
+ version = "0.0.0";
45
+ pyproject = true;
46
+
47
+ # NOTE: The files filtered out here are not visible in the build sandbox, neither
48
+ # do they affect the output hash. They can be modified without triggering a rebuild.
49
+ src = lib.cleanSourceWith {
50
+ filter =
51
+ name: type:
52
+ let
53
+ any = builtins.any (x: x);
54
+ baseName = builtins.baseNameOf name;
55
+ in
56
+ any [
57
+ (lib.hasSuffix ".py" name)
58
+ (baseName == "README.md")
59
+ (baseName == "pyproject.toml")
60
+ ];
61
+ src = lib.cleanSource ../../.;
62
+ };
63
+ nativeBuildInputs = [ poetry-core ];
64
+ nativeCheckInputs = llama-python-test-deps;
65
+ dependencies = llama-python-deps;
66
+ })
llama.cpp/.devops/nix/scope.nix ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ lib,
3
+ newScope,
4
+ python3,
5
+ llamaVersion ? "0.0.0",
6
+ }:
7
+
8
+ let
9
+ pythonPackages = python3.pkgs;
10
+ buildPythonPackage = pythonPackages.buildPythonPackage;
11
+ numpy = pythonPackages.numpy;
12
+ tqdm = pythonPackages.tqdm;
13
+ sentencepiece = pythonPackages.sentencepiece;
14
+ pyyaml = pythonPackages.pyyaml;
15
+ poetry-core = pythonPackages.poetry-core;
16
+ pytestCheckHook = pythonPackages.pytestCheckHook;
17
+ in
18
+
19
+ # We're using `makeScope` instead of just writing out an attrset
20
+ # because it allows users to apply overlays later using `overrideScope'`.
21
+ # Cf. https://noogle.dev/f/lib/makeScope
22
+
23
+ lib.makeScope newScope (self: {
24
+ inherit llamaVersion;
25
+ gguf-py = self.callPackage ./package-gguf-py.nix {
26
+ inherit
27
+ buildPythonPackage
28
+ numpy
29
+ tqdm
30
+ sentencepiece
31
+ poetry-core
32
+ pyyaml
33
+ pytestCheckHook
34
+ ;
35
+ };
36
+ python-scripts = self.callPackage ./python-scripts.nix { inherit buildPythonPackage poetry-core; };
37
+ llama-cpp = self.callPackage ./package.nix { };
38
+ docker = self.callPackage ./docker.nix { };
39
+ docker-min = self.callPackage ./docker.nix { interactive = false; };
40
+ sif = self.callPackage ./sif.nix { };
41
+ })
llama.cpp/.devops/nix/sif.nix ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ lib,
3
+ singularity-tools,
4
+ llama-cpp,
5
+ bashInteractive,
6
+ interactive ? false,
7
+ }:
8
+
9
+ let
10
+ optionalInt = cond: x: if cond then x else 0;
11
+ in
12
+ singularity-tools.buildImage rec {
13
+ inherit (llama-cpp) name;
14
+ contents = [ llama-cpp ] ++ lib.optionals interactive [ bashInteractive ];
15
+
16
+ # These are excessive (but safe) for most variants. Building singularity
17
+ # images requires superuser privileges, so we build them inside a VM in a
18
+ # writable image of pre-determined size.
19
+ #
20
+ # ROCm is currently affected by https://github.com/NixOS/nixpkgs/issues/276846
21
+ #
22
+ # Expected image sizes:
23
+ # - cpu/blas: 150M,
24
+ # - cuda, all gencodes: 560M,
25
+ diskSize = 4096 + optionalInt llama-cpp.useRocm 16384;
26
+ memSize = diskSize;
27
+ }
llama.cpp/.devops/rocm.Dockerfile ADDED
@@ -0,0 +1,113 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG UBUNTU_VERSION=24.04
2
+
3
+ # This needs to generally match the container host's environment.
4
+ ARG ROCM_VERSION=6.3
5
+ ARG AMDGPU_VERSION=6.3
6
+
7
+ # Target the CUDA build image
8
+ ARG BASE_ROCM_DEV_CONTAINER=rocm/dev-ubuntu-${UBUNTU_VERSION}:${ROCM_VERSION}-complete
9
+
10
+ ### Build image
11
+ FROM ${BASE_ROCM_DEV_CONTAINER} AS build
12
+
13
+ # Unless otherwise specified, we make a fat build.
14
+ # List from https://github.com/ggerganov/llama.cpp/pull/1087#issuecomment-1682807878
15
+ # This is mostly tied to rocBLAS supported archs.
16
+ # gfx803, gfx900, gfx1032, gfx1101, gfx1102,not officialy supported
17
+ # gfx906 is deprecated
18
+ #check https://rocm.docs.amd.com/projects/install-on-linux/en/docs-6.2.4/reference/system-requirements.html
19
+
20
+ #ARG ROCM_DOCKER_ARCH='gfx803,gfx900,gfx906,gfx908,gfx90a,gfx942,gfx1010,gfx1030,gfx1032,gfx1100,gfx1101,gfx1102'
21
+ ARG ROCM_DOCKER_ARCH=gfx1100
22
+
23
+ # Set nvcc architectured
24
+ ENV AMDGPU_TARGETS=${ROCM_DOCKER_ARCH}
25
+ # Enable ROCm
26
+ # ENV CC=/opt/rocm/llvm/bin/clang
27
+ # ENV CXX=/opt/rocm/llvm/bin/clang++
28
+
29
+ RUN apt-get update \
30
+ && apt-get install -y \
31
+ build-essential \
32
+ cmake \
33
+ git \
34
+ libcurl4-openssl-dev \
35
+ curl \
36
+ libgomp1
37
+
38
+ WORKDIR /app
39
+
40
+ COPY . .
41
+
42
+ RUN HIPCXX="$(hipconfig -l)/clang" HIP_PATH="$(hipconfig -R)" \
43
+ cmake -S . -B build -DGGML_HIP=ON -DAMDGPU_TARGETS=$ROCM_DOCKER_ARCH -DCMAKE_BUILD_TYPE=Release -DLLAMA_CURL=ON \
44
+ && cmake --build build --config Release -j$(nproc)
45
+
46
+ RUN mkdir -p /app/lib \
47
+ && find build -name "*.so" -exec cp {} /app/lib \;
48
+
49
+ RUN mkdir -p /app/full \
50
+ && cp build/bin/* /app/full \
51
+ && cp *.py /app/full \
52
+ && cp -r gguf-py /app/full \
53
+ && cp -r requirements /app/full \
54
+ && cp requirements.txt /app/full \
55
+ && cp .devops/tools.sh /app/full/tools.sh
56
+
57
+ ## Base image
58
+ FROM ${BASE_ROCM_DEV_CONTAINER} AS base
59
+
60
+ RUN apt-get update \
61
+ && apt-get install -y libgomp1 curl\
62
+ && apt autoremove -y \
63
+ && apt clean -y \
64
+ && rm -rf /tmp/* /var/tmp/* \
65
+ && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
66
+ && find /var/cache -type f -delete
67
+
68
+ COPY --from=build /app/lib/ /app
69
+
70
+ ### Full
71
+ FROM base AS full
72
+
73
+ COPY --from=build /app/full /app
74
+
75
+ WORKDIR /app
76
+
77
+ RUN apt-get update \
78
+ && apt-get install -y \
79
+ git \
80
+ python3-pip \
81
+ python3 \
82
+ python3-wheel\
83
+ && pip install --break-system-packages --upgrade setuptools \
84
+ && pip install --break-system-packages -r requirements.txt \
85
+ && apt autoremove -y \
86
+ && apt clean -y \
87
+ && rm -rf /tmp/* /var/tmp/* \
88
+ && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
89
+ && find /var/cache -type f -delete
90
+
91
+ ENTRYPOINT ["/app/tools.sh"]
92
+
93
+ ### Light, CLI only
94
+ FROM base AS light
95
+
96
+ COPY --from=build /app/full/llama-cli /app
97
+
98
+ WORKDIR /app
99
+
100
+ ENTRYPOINT [ "/app/llama-cli" ]
101
+
102
+ ### Server, Server only
103
+ FROM base AS server
104
+
105
+ ENV LLAMA_ARG_HOST=0.0.0.0
106
+
107
+ COPY --from=build /app/full/llama-server /app
108
+
109
+ WORKDIR /app
110
+
111
+ HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
112
+
113
+ ENTRYPOINT [ "/app/llama-server" ]
llama.cpp/.devops/tools.sh ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ set -e
3
+
4
+ # Read the first argument into a variable
5
+ arg1="$1"
6
+
7
+ # Shift the arguments to remove the first one
8
+ shift
9
+
10
+ if [[ "$arg1" == '--convert' || "$arg1" == '-c' ]]; then
11
+ exec python3 ./convert_hf_to_gguf.py "$@"
12
+ elif [[ "$arg1" == '--quantize' || "$arg1" == '-q' ]]; then
13
+ exec ./llama-quantize "$@"
14
+ elif [[ "$arg1" == '--run' || "$arg1" == '-r' ]]; then
15
+ exec ./llama-cli "$@"
16
+ elif [[ "$arg1" == '--bench' || "$arg1" == '-b' ]]; then
17
+ exec ./llama-bench "$@"
18
+ elif [[ "$arg1" == '--perplexity' || "$arg1" == '-p' ]]; then
19
+ exec ./llama-perplexity "$@"
20
+ elif [[ "$arg1" == '--all-in-one' || "$arg1" == '-a' ]]; then
21
+ echo "Converting PTH to GGML..."
22
+ for i in $(ls $1/$2/ggml-model-f16.bin*); do
23
+ if [ -f "${i/f16/q4_0}" ]; then
24
+ echo "Skip model quantization, it already exists: ${i/f16/q4_0}"
25
+ else
26
+ echo "Converting PTH to GGML: $i into ${i/f16/q4_0}..."
27
+ exec ./llama-quantize "$i" "${i/f16/q4_0}" q4_0
28
+ fi
29
+ done
30
+ elif [[ "$arg1" == '--server' || "$arg1" == '-s' ]]; then
31
+ exec ./llama-server "$@"
32
+ else
33
+ echo "Unknown command: $arg1"
34
+ echo "Available commands: "
35
+ echo " --run (-r): Run a model previously converted into ggml"
36
+ echo " ex: -m /models/7B/ggml-model-q4_0.bin -p \"Building a website can be done in 10 simple steps:\" -n 512"
37
+ echo " --bench (-b): Benchmark the performance of the inference for various parameters."
38
+ echo " ex: -m model.gguf"
39
+ echo " --perplexity (-p): Measure the perplexity of a model over a given text."
40
+ echo " ex: -m model.gguf -f file.txt"
41
+ echo " --convert (-c): Convert a llama model into ggml"
42
+ echo " ex: --outtype f16 \"/models/7B/\" "
43
+ echo " --quantize (-q): Optimize with quantization process ggml"
44
+ echo " ex: \"/models/7B/ggml-model-f16.bin\" \"/models/7B/ggml-model-q4_0.bin\" 2"
45
+ echo " --all-in-one (-a): Execute --convert & --quantize"
46
+ echo " ex: \"/models/\" 7B"
47
+ echo " --server (-s): Run a model on the server"
48
+ echo " ex: -m /models/7B/ggml-model-q4_0.bin -c 2048 -ngl 43 -mg 1 --port 8080"
49
+ fi
llama.cpp/.devops/vulkan.Dockerfile ADDED
@@ -0,0 +1,89 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG UBUNTU_VERSION=24.04
2
+
3
+ FROM ubuntu:$UBUNTU_VERSION AS build
4
+
5
+ # Install build tools
6
+ RUN apt update && apt install -y git build-essential cmake wget
7
+
8
+ # Install Vulkan SDK and cURL
9
+ RUN wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | apt-key add - && \
10
+ wget -qO /etc/apt/sources.list.d/lunarg-vulkan-noble.list https://packages.lunarg.com/vulkan/lunarg-vulkan-noble.list && \
11
+ apt update -y && \
12
+ apt-get install -y vulkan-sdk libcurl4-openssl-dev curl
13
+
14
+ # Build it
15
+ WORKDIR /app
16
+
17
+ COPY . .
18
+
19
+ RUN cmake -B build -DGGML_NATIVE=OFF -DGGML_VULKAN=1 -DLLAMA_CURL=1 && \
20
+ cmake --build build --config Release -j$(nproc)
21
+
22
+ RUN mkdir -p /app/lib && \
23
+ find build -name "*.so" -exec cp {} /app/lib \;
24
+
25
+ RUN mkdir -p /app/full \
26
+ && cp build/bin/* /app/full \
27
+ && cp *.py /app/full \
28
+ && cp -r gguf-py /app/full \
29
+ && cp -r requirements /app/full \
30
+ && cp requirements.txt /app/full \
31
+ && cp .devops/tools.sh /app/full/tools.sh
32
+
33
+ ## Base image
34
+ FROM ubuntu:$UBUNTU_VERSION AS base
35
+
36
+ RUN apt-get update \
37
+ && apt-get install -y libgomp1 curl libvulkan-dev \
38
+ && apt autoremove -y \
39
+ && apt clean -y \
40
+ && rm -rf /tmp/* /var/tmp/* \
41
+ && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
42
+ && find /var/cache -type f -delete
43
+
44
+ COPY --from=build /app/lib/ /app
45
+
46
+ ### Full
47
+ FROM base AS full
48
+
49
+ COPY --from=build /app/full /app
50
+
51
+ WORKDIR /app
52
+
53
+ RUN apt-get update \
54
+ && apt-get install -y \
55
+ git \
56
+ python3 \
57
+ python3-pip \
58
+ python3-wheel \
59
+ && pip install --break-system-packages --upgrade setuptools \
60
+ && pip install --break-system-packages -r requirements.txt \
61
+ && apt autoremove -y \
62
+ && apt clean -y \
63
+ && rm -rf /tmp/* /var/tmp/* \
64
+ && find /var/cache/apt/archives /var/lib/apt/lists -not -name lock -type f -delete \
65
+ && find /var/cache -type f -delete
66
+
67
+ ENTRYPOINT ["/app/tools.sh"]
68
+
69
+ ### Light, CLI only
70
+ FROM base AS light
71
+
72
+ COPY --from=build /app/full/llama-cli /app
73
+
74
+ WORKDIR /app
75
+
76
+ ENTRYPOINT [ "/app/llama-cli" ]
77
+
78
+ ### Server, Server only
79
+ FROM base AS server
80
+
81
+ ENV LLAMA_ARG_HOST=0.0.0.0
82
+
83
+ COPY --from=build /app/full/llama-server /app
84
+
85
+ WORKDIR /app
86
+
87
+ HEALTHCHECK CMD [ "curl", "-f", "http://localhost:8080/health" ]
88
+
89
+ ENTRYPOINT [ "/app/llama-server" ]
llama.cpp/.dockerignore ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.o
2
+ *.a
3
+ .cache/
4
+ # Do not ignore .git directory, otherwise the reported build number will always be 0
5
+ .github/
6
+ .gitignore
7
+ .vs/
8
+ .vscode/
9
+ .DS_Store
10
+
11
+ build*/
12
+
13
+ models/*
14
+
15
+ /llama-cli
16
+ /llama-quantize
17
+
18
+ arm_neon.h
19
+ compile_commands.json
20
+ Dockerfile
llama.cpp/.ecrc ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "Exclude": ["^\\.gitmodules$", "stb_image\\.h"],
3
+ "Disable": {
4
+ "IndentSize": true
5
+ }
6
+ }
llama.cpp/.editorconfig ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # https://EditorConfig.org
2
+
3
+ # Top-most EditorConfig file
4
+ root = true
5
+
6
+ # Unix-style newlines with a newline ending every file, utf-8 charset
7
+ [*]
8
+ end_of_line = lf
9
+ insert_final_newline = true
10
+ trim_trailing_whitespace = true
11
+ charset = utf-8
12
+ indent_style = space
13
+ indent_size = 4
14
+
15
+ [Makefile]
16
+ indent_style = tab
17
+
18
+ [scripts/*.mk]
19
+ indent_style = tab
20
+
21
+ [prompts/*.txt]
22
+ insert_final_newline = unset
23
+
24
+ [examples/server/public/*]
25
+ indent_size = 2
26
+
27
+ [examples/server/public/deps_*]
28
+ trim_trailing_whitespace = unset
29
+ indent_style = unset
30
+ indent_size = unset
31
+
32
+ [examples/server/deps_*]
33
+ trim_trailing_whitespace = unset
34
+ indent_style = unset
35
+ indent_size = unset
36
+
37
+ [examples/llama.swiftui/llama.swiftui.xcodeproj/*]
38
+ indent_style = tab
39
+
40
+ [examples/cvector-generator/*.txt]
41
+ trim_trailing_whitespace = unset
42
+ insert_final_newline = unset
43
+
44
+ [models/templates/*.jinja]
45
+ indent_style = unset
46
+ indent_size = unset
47
+ end_of_line = unset
48
+ charset = unset
49
+ trim_trailing_whitespace = unset
50
+ insert_final_newline = unset
llama.cpp/.flake8 ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [flake8]
2
+ max-line-length = 125
3
+ ignore = E203,E211,E221,E225,E231,E241,E251,E261,E266,E501,E701,E704,W503
4
+ exclude =
5
+ # Do not traverse examples
6
+ examples,
7
+ # Do not include package initializers
8
+ __init__.py,
9
+ # No need to traverse our git directory
10
+ .git,
11
+ # There's no value in checking cache directories
12
+ __pycache__,
13
+ # No need to include the build path
14
+ build,
15
+ # This contains builds that we don't want to check
16
+ dist # This is generated with `python build .` for package releases
17
+ # max-complexity = 10
llama.cpp/.github/.DS_Store ADDED
Binary file (6.15 kB). View file
 
llama.cpp/.github/ISSUE_TEMPLATE/010-bug-compilation.yml ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Bug (compilation)
2
+ description: Something goes wrong when trying to compile llama.cpp.
3
+ title: "Compile bug: "
4
+ labels: ["bug-unconfirmed", "compilation"]
5
+ body:
6
+ - type: markdown
7
+ attributes:
8
+ value: >
9
+ Thanks for taking the time to fill out this bug report!
10
+ This issue template is intended for bug reports where the compilation of llama.cpp fails.
11
+ Before opening an issue, please confirm that the compilation still fails with `-DGGML_CCACHE=OFF`.
12
+ If the compilation succeeds with ccache disabled you should be able to permanently fix the issue
13
+ by clearing `~/.cache/ccache` (on Linux).
14
+ - type: textarea
15
+ id: commit
16
+ attributes:
17
+ label: Git commit
18
+ description: Which commit are you trying to compile?
19
+ placeholder: |
20
+ $git rev-parse HEAD
21
+ 84a07a17b1b08cf2b9747c633a2372782848a27f
22
+ validations:
23
+ required: true
24
+ - type: dropdown
25
+ id: operating-system
26
+ attributes:
27
+ label: Operating systems
28
+ description: Which operating systems do you know to be affected?
29
+ multiple: true
30
+ options:
31
+ - Linux
32
+ - Mac
33
+ - Windows
34
+ - BSD
35
+ - Other? (Please let us know in description)
36
+ validations:
37
+ required: true
38
+ - type: dropdown
39
+ id: backends
40
+ attributes:
41
+ label: GGML backends
42
+ description: Which GGML backends do you know to be affected?
43
+ options: [AMX, BLAS, CPU, CUDA, HIP, Kompute, Metal, Musa, RPC, SYCL, Vulkan]
44
+ multiple: true
45
+ validations:
46
+ required: true
47
+ - type: textarea
48
+ id: info
49
+ attributes:
50
+ label: Problem description & steps to reproduce
51
+ description: >
52
+ Please give us a summary of the problem and tell us how to reproduce it.
53
+ If you can narrow down the bug to specific compile flags, that information would be very much appreciated by us.
54
+ placeholder: >
55
+ I'm trying to compile llama.cpp with CUDA support on a fresh install of Ubuntu and get error XY.
56
+ Here are the exact commands that I used: ...
57
+ validations:
58
+ required: true
59
+ - type: textarea
60
+ id: first_bad_commit
61
+ attributes:
62
+ label: First Bad Commit
63
+ description: >
64
+ If the bug was not present on an earlier version: when did it start appearing?
65
+ If possible, please do a git bisect and identify the exact commit that introduced the bug.
66
+ validations:
67
+ required: false
68
+ - type: textarea
69
+ id: command
70
+ attributes:
71
+ label: Compile command
72
+ description: >
73
+ Please provide the exact command you used to compile llama.cpp. For example: `cmake -B ...`.
74
+ This will be automatically formatted into code, so no need for backticks.
75
+ render: shell
76
+ validations:
77
+ required: true
78
+ - type: textarea
79
+ id: logs
80
+ attributes:
81
+ label: Relevant log output
82
+ description: >
83
+ Please copy and paste any relevant log output, including any generated text.
84
+ This will be automatically formatted into code, so no need for backticks.
85
+ render: shell
86
+ validations:
87
+ required: true
llama.cpp/.github/ISSUE_TEMPLATE/011-bug-results.yml ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Bug (model use)
2
+ description: Something goes wrong when using a model (in general, not specific to a single llama.cpp module).
3
+ title: "Eval bug: "
4
+ labels: ["bug-unconfirmed", "model evaluation"]
5
+ body:
6
+ - type: markdown
7
+ attributes:
8
+ value: >
9
+ Thanks for taking the time to fill out this bug report!
10
+ This issue template is intended for bug reports where the model evaluation results
11
+ (i.e. the generated text) are incorrect or llama.cpp crashes during model evaluation.
12
+ If you encountered the issue while using an external UI (e.g. ollama),
13
+ please reproduce your issue using one of the examples/binaries in this repository.
14
+ The `llama-cli` binary can be used for simple and reproducible model inference.
15
+ - type: textarea
16
+ id: version
17
+ attributes:
18
+ label: Name and Version
19
+ description: Which version of our software are you running? (use `--version` to get a version string)
20
+ placeholder: |
21
+ $./llama-cli --version
22
+ version: 2999 (42b4109e)
23
+ built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
24
+ validations:
25
+ required: true
26
+ - type: dropdown
27
+ id: operating-system
28
+ attributes:
29
+ label: Operating systems
30
+ description: Which operating systems do you know to be affected?
31
+ multiple: true
32
+ options:
33
+ - Linux
34
+ - Mac
35
+ - Windows
36
+ - BSD
37
+ - Other? (Please let us know in description)
38
+ validations:
39
+ required: true
40
+ - type: dropdown
41
+ id: backends
42
+ attributes:
43
+ label: GGML backends
44
+ description: Which GGML backends do you know to be affected?
45
+ options: [AMX, BLAS, CPU, CUDA, HIP, Kompute, Metal, Musa, RPC, SYCL, Vulkan]
46
+ multiple: true
47
+ validations:
48
+ required: true
49
+ - type: textarea
50
+ id: hardware
51
+ attributes:
52
+ label: Hardware
53
+ description: Which CPUs/GPUs are you using?
54
+ placeholder: >
55
+ e.g. Ryzen 5950X + 2x RTX 4090
56
+ validations:
57
+ required: true
58
+ - type: textarea
59
+ id: model
60
+ attributes:
61
+ label: Models
62
+ description: >
63
+ Which model(s) at which quantization were you using when encountering the bug?
64
+ If you downloaded a GGUF file off of Huggingface, please provide a link.
65
+ placeholder: >
66
+ e.g. Meta LLaMA 3.1 Instruct 8b q4_K_M
67
+ validations:
68
+ required: false
69
+ - type: textarea
70
+ id: info
71
+ attributes:
72
+ label: Problem description & steps to reproduce
73
+ description: >
74
+ Please give us a summary of the problem and tell us how to reproduce it.
75
+ If you can narrow down the bug to specific hardware, compile flags, or command line arguments,
76
+ that information would be very much appreciated by us.
77
+ placeholder: >
78
+ e.g. when I run llama-cli with -ngl 99 I get garbled outputs.
79
+ When I use -ngl 0 it works correctly.
80
+ Here are the exact commands that I used: ...
81
+ validations:
82
+ required: true
83
+ - type: textarea
84
+ id: first_bad_commit
85
+ attributes:
86
+ label: First Bad Commit
87
+ description: >
88
+ If the bug was not present on an earlier version: when did it start appearing?
89
+ If possible, please do a git bisect and identify the exact commit that introduced the bug.
90
+ validations:
91
+ required: false
92
+ - type: textarea
93
+ id: logs
94
+ attributes:
95
+ label: Relevant log output
96
+ description: >
97
+ Please copy and paste any relevant log output, including the command that you entered and any generated text.
98
+ This will be automatically formatted into code, so no need for backticks.
99
+ render: shell
100
+ validations:
101
+ required: true
llama.cpp/.github/ISSUE_TEMPLATE/019-bug-misc.yml ADDED
@@ -0,0 +1,91 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Bug (misc.)
2
+ description: Something is not working the way it should (and it's not covered by any of the above cases).
3
+ title: "Misc. bug: "
4
+ labels: ["bug-unconfirmed"]
5
+ body:
6
+ - type: markdown
7
+ attributes:
8
+ value: >
9
+ Thanks for taking the time to fill out this bug report!
10
+ This issue template is intended for miscellaneous bugs that don't fit into any other category.
11
+ If you encountered the issue while using an external UI (e.g. ollama),
12
+ please reproduce your issue using one of the examples/binaries in this repository.
13
+ - type: textarea
14
+ id: version
15
+ attributes:
16
+ label: Name and Version
17
+ description: Which version of our software is affected? (You can use `--version` to get a version string.)
18
+ placeholder: |
19
+ $./llama-cli --version
20
+ version: 2999 (42b4109e)
21
+ built with cc (Ubuntu 11.4.0-1ubuntu1~22.04) 11.4.0 for x86_64-linux-gnu
22
+ validations:
23
+ required: true
24
+ - type: dropdown
25
+ id: operating-system
26
+ attributes:
27
+ label: Operating systems
28
+ description: Which operating systems do you know to be affected?
29
+ multiple: true
30
+ options:
31
+ - Linux
32
+ - Mac
33
+ - Windows
34
+ - BSD
35
+ - Other? (Please let us know in description)
36
+ validations:
37
+ required: false
38
+ - type: dropdown
39
+ id: module
40
+ attributes:
41
+ label: Which llama.cpp modules do you know to be affected?
42
+ multiple: true
43
+ options:
44
+ - Documentation/Github
45
+ - libllama (core library)
46
+ - llama-cli
47
+ - llama-server
48
+ - llama-bench
49
+ - llama-quantize
50
+ - Python/Bash scripts
51
+ - Test code
52
+ - Other (Please specify in the next section)
53
+ validations:
54
+ required: false
55
+ - type: textarea
56
+ id: command
57
+ attributes:
58
+ label: Command line
59
+ description: >
60
+ Please provide the exact commands you entered, if applicable. For example: `llama-server -m ... -c ...`, `llama-cli -m ...`, etc.
61
+ This will be automatically formatted into code, so no need for backticks.
62
+ render: shell
63
+ validations:
64
+ required: false
65
+ - type: textarea
66
+ id: info
67
+ attributes:
68
+ label: Problem description & steps to reproduce
69
+ description: >
70
+ Please give us a summary of the problem and tell us how to reproduce it (if applicable).
71
+ validations:
72
+ required: true
73
+ - type: textarea
74
+ id: first_bad_commit
75
+ attributes:
76
+ label: First Bad Commit
77
+ description: >
78
+ If the bug was not present on an earlier version and it's not trivial to track down: when did it start appearing?
79
+ If possible, please do a git bisect and identify the exact commit that introduced the bug.
80
+ validations:
81
+ required: false
82
+ - type: textarea
83
+ id: logs
84
+ attributes:
85
+ label: Relevant log output
86
+ description: >
87
+ If applicable, please copy and paste any relevant log output, including any generated text.
88
+ This will be automatically formatted into code, so no need for backticks.
89
+ render: shell
90
+ validations:
91
+ required: false
llama.cpp/.github/ISSUE_TEMPLATE/020-enhancement.yml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Enhancement
2
+ description: Used to request enhancements for llama.cpp.
3
+ title: "Feature Request: "
4
+ labels: ["enhancement"]
5
+ body:
6
+ - type: markdown
7
+ attributes:
8
+ value: |
9
+ [Please post your idea first in Discussion if there is not yet a consensus for this enhancement request. This will help to keep this issue tracker focused on enhancements that the community has agreed needs to be implemented.](https://github.com/ggerganov/llama.cpp/discussions/categories/ideas)
10
+
11
+ - type: checkboxes
12
+ id: prerequisites
13
+ attributes:
14
+ label: Prerequisites
15
+ description: Please confirm the following before submitting your enhancement request.
16
+ options:
17
+ - label: I am running the latest code. Mention the version if possible as well.
18
+ required: true
19
+ - label: I carefully followed the [README.md](https://github.com/ggerganov/llama.cpp/blob/master/README.md).
20
+ required: true
21
+ - label: I searched using keywords relevant to my issue to make sure that I am creating a new issue that is not already open (or closed).
22
+ required: true
23
+ - label: I reviewed the [Discussions](https://github.com/ggerganov/llama.cpp/discussions), and have a new and useful enhancement to share.
24
+ required: true
25
+
26
+ - type: textarea
27
+ id: feature-description
28
+ attributes:
29
+ label: Feature Description
30
+ description: Please provide a detailed written description of what you were trying to do, and what you expected `llama.cpp` to do as an enhancement.
31
+ placeholder: Detailed description of the enhancement
32
+ validations:
33
+ required: true
34
+
35
+ - type: textarea
36
+ id: motivation
37
+ attributes:
38
+ label: Motivation
39
+ description: Please provide a detailed written description of reasons why this feature is necessary and how it is useful to `llama.cpp` users.
40
+ placeholder: Explanation of why this feature is needed and its benefits
41
+ validations:
42
+ required: true
43
+
44
+ - type: textarea
45
+ id: possible-implementation
46
+ attributes:
47
+ label: Possible Implementation
48
+ description: If you have an idea as to how it can be implemented, please write a detailed description. Feel free to give links to external sources or share visuals that might be helpful to understand the details better.
49
+ placeholder: Detailed description of potential implementation
50
+ validations:
51
+ required: false
llama.cpp/.github/ISSUE_TEMPLATE/030-research.yml ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Research
2
+ description: Track new technical research area.
3
+ title: "Research: "
4
+ labels: ["research 🔬"]
5
+ body:
6
+ - type: markdown
7
+ attributes:
8
+ value: |
9
+ Don't forget to check for any [duplicate research issue tickets](https://github.com/ggerganov/llama.cpp/issues?q=is%3Aopen+is%3Aissue+label%3A%22research+%F0%9F%94%AC%22)
10
+
11
+ - type: checkboxes
12
+ id: research-stage
13
+ attributes:
14
+ label: Research Stage
15
+ description: Track general state of this research ticket
16
+ options:
17
+ - label: Background Research (Let's try to avoid reinventing the wheel)
18
+ - label: Hypothesis Formed (How do you think this will work and it's effect?)
19
+ - label: Strategy / Implementation Forming
20
+ - label: Analysis of results
21
+ - label: Debrief / Documentation (So people in the future can learn from us)
22
+
23
+ - type: textarea
24
+ id: background
25
+ attributes:
26
+ label: Previous existing literature and research
27
+ description: Whats the current state of the art and whats the motivation for this research?
28
+
29
+ - type: textarea
30
+ id: hypothesis
31
+ attributes:
32
+ label: Hypothesis
33
+ description: How do you think this will work and it's effect?
34
+
35
+ - type: textarea
36
+ id: implementation
37
+ attributes:
38
+ label: Implementation
39
+ description: Got an approach? e.g. a PR ready to go?
40
+
41
+ - type: textarea
42
+ id: analysis
43
+ attributes:
44
+ label: Analysis
45
+ description: How does the proposed implementation behave?
46
+
47
+ - type: textarea
48
+ id: logs
49
+ attributes:
50
+ label: Relevant log output
51
+ description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks.
52
+ render: shell
llama.cpp/.github/ISSUE_TEMPLATE/040-refactor.yml ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Refactor (Maintainers)
2
+ description: Used to track refactoring opportunities.
3
+ title: "Refactor: "
4
+ labels: ["refactor"]
5
+ body:
6
+ - type: markdown
7
+ attributes:
8
+ value: |
9
+ Don't forget to [check for existing refactor issue tickets](https://github.com/ggerganov/llama.cpp/issues?q=is%3Aopen+is%3Aissue+label%3Arefactoring) in case it's already covered.
10
+ Also you may want to check [Pull request refactor label as well](https://github.com/ggerganov/llama.cpp/pulls?q=is%3Aopen+is%3Apr+label%3Arefactoring) for duplicates too.
11
+
12
+ - type: textarea
13
+ id: background-description
14
+ attributes:
15
+ label: Background Description
16
+ description: Please provide a detailed written description of the pain points you are trying to solve.
17
+ placeholder: Detailed description behind your motivation to request refactor
18
+ validations:
19
+ required: true
20
+
21
+ - type: textarea
22
+ id: possible-approaches
23
+ attributes:
24
+ label: Possible Refactor Approaches
25
+ description: If you have some idea of possible approaches to solve this problem. You may want to make it a todo list.
26
+ placeholder: Your idea of possible refactoring opportunity/approaches
27
+ validations:
28
+ required: false
llama.cpp/.github/ISSUE_TEMPLATE/config.yml ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ blank_issues_enabled: true
2
+ contact_links:
3
+ - name: Got an idea?
4
+ url: https://github.com/ggerganov/llama.cpp/discussions/categories/ideas
5
+ about: Pop it there. It may then become an enhancement ticket.
6
+ - name: Got a question?
7
+ url: https://github.com/ggerganov/llama.cpp/discussions/categories/q-a
8
+ about: Ask a question there!
9
+ - name: Want to contribute?
10
+ url: https://github.com/ggerganov/llama.cpp/wiki/contribute
11
+ about: Head to the contribution guide page of the wiki for areas you can help with
llama.cpp/.github/labeler.yml ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # https://github.com/actions/labeler
2
+ Kompute:
3
+ - changed-files:
4
+ - any-glob-to-any-file:
5
+ - ggml/include/ggml-kompute.h
6
+ - ggml/src/ggml-kompute/**
7
+ - README-kompute.md
8
+ Apple Metal:
9
+ - changed-files:
10
+ - any-glob-to-any-file:
11
+ - ggml/include/ggml-metal.h
12
+ - ggml/src/ggml-metal/**
13
+ - README-metal.md
14
+ SYCL:
15
+ - changed-files:
16
+ - any-glob-to-any-file:
17
+ - ggml/include/ggml-sycl.h
18
+ - ggml/src/ggml-sycl/**
19
+ - docs/backend/SYCL.md
20
+ - examples/sycl/**
21
+ Nvidia GPU:
22
+ - changed-files:
23
+ - any-glob-to-any-file:
24
+ - ggml/include/ggml-cuda.h
25
+ - ggml/src/ggml-cuda/**
26
+ Vulkan:
27
+ - changed-files:
28
+ - any-glob-to-any-file:
29
+ - ggml/include/ggml-vulkan.h
30
+ - ggml/src/ggml-vulkan/**
31
+ documentation:
32
+ - changed-files:
33
+ - any-glob-to-any-file:
34
+ - docs/**
35
+ - media/**
36
+ testing:
37
+ - changed-files:
38
+ - any-glob-to-any-file:
39
+ - tests/**
40
+ build:
41
+ - changed-files:
42
+ - any-glob-to-any-file:
43
+ - cmake/**
44
+ - CMakeLists.txt
45
+ - CMakePresets.json
46
+ examples:
47
+ - changed-files:
48
+ - any-glob-to-any-file: examples/**
49
+ devops:
50
+ - changed-files:
51
+ - any-glob-to-any-file:
52
+ - .devops/**
53
+ - .github/**
54
+ - ci/**
55
+ python:
56
+ - changed-files:
57
+ - any-glob-to-any-file:
58
+ - "**/*.py"
59
+ - requirements/**
60
+ - gguf-py/**
61
+ - .flake8
62
+ script:
63
+ - changed-files:
64
+ - any-glob-to-any-file:
65
+ - scripts/**
66
+ android:
67
+ - changed-files:
68
+ - any-glob-to-any-file:
69
+ - examples/llama.android/**
70
+ server:
71
+ - changed-files:
72
+ - any-glob-to-any-file:
73
+ - examples/server/**
74
+ ggml:
75
+ - changed-files:
76
+ - any-glob-to-any-file:
77
+ - ggml/**
78
+ nix:
79
+ - changed-files:
80
+ - any-glob-to-any-file:
81
+ - "**/*.nix"
82
+ - .github/workflows/nix-*.yml
83
+ - .devops/nix/nixpkgs-instances.nix
84
+ embedding:
85
+ - changed-files:
86
+ - any-glob-to-any-file: examples/embedding/
llama.cpp/.github/pull_request_template.md ADDED
@@ -0,0 +1 @@
 
 
1
+ *Make sure to read the [contributing guidelines](https://github.com/ggerganov/llama.cpp/blob/master/CONTRIBUTING.md) before submitting a PR*
llama.cpp/.github/workflows/bench.yml.disabled ADDED
@@ -0,0 +1,315 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # TODO: there have been some issues with the workflow, so disabling for now
2
+ # https://github.com/ggerganov/llama.cpp/issues/7893
3
+ #
4
+ # Benchmark
5
+ name: Benchmark
6
+
7
+ on:
8
+ workflow_dispatch:
9
+ inputs:
10
+ gpu-series:
11
+ description: 'Azure GPU series to run with'
12
+ required: true
13
+ type: choice
14
+ options:
15
+ - Standard_NC4as_T4_v3
16
+ - Standard_NC24ads_A100_v4
17
+ - Standard_NC80adis_H100_v5
18
+ sha:
19
+ description: 'Commit SHA1 to build'
20
+ required: false
21
+ type: string
22
+ duration:
23
+ description: 'Duration of the bench'
24
+ type: string
25
+ default: 10m
26
+
27
+ push:
28
+ branches:
29
+ - master
30
+ paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
31
+ pull_request_target:
32
+ types: [opened, synchronize, reopened]
33
+ paths: ['llama.cpp', 'ggml.c', 'ggml-backend.cpp', 'ggml-quants.c', '**/*.cu', 'examples/server/*.h*', 'examples/server/*.cpp']
34
+ schedule:
35
+ - cron: '04 2 * * *'
36
+
37
+ concurrency:
38
+ group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }}-${{ github.event.inputs.sha }}
39
+ cancel-in-progress: true
40
+
41
+ jobs:
42
+ bench-server-baseline:
43
+ runs-on: Standard_NC4as_T4_v3
44
+ env:
45
+ RUNNER_LABEL: Standard_NC4as_T4_v3 # FIXME Do not find a way to not duplicate it
46
+ N_USERS: 8
47
+ DURATION: 10m
48
+
49
+ strategy:
50
+ matrix:
51
+ model: [phi-2]
52
+ ftype: [q4_0, q8_0, f16]
53
+ include:
54
+ - model: phi-2
55
+ ftype: q4_0
56
+ pr_comment_enabled: "true"
57
+
58
+ if: |
59
+ inputs.gpu-series == 'Standard_NC4as_T4_v3'
60
+ || (
61
+ github.event_name == 'schedule'
62
+ && github.ref_name == 'master'
63
+ && github.repository_owner == 'ggerganov'
64
+ )
65
+ || github.event_name == 'pull_request_target'
66
+ || (
67
+ github.event_name == 'push'
68
+ && github.event.ref == 'refs/heads/master'
69
+ && github.repository_owner == 'ggerganov'
70
+ )
71
+ steps:
72
+ - name: Clone
73
+ id: checkout
74
+ uses: actions/checkout@v4
75
+ with:
76
+ fetch-depth: 0
77
+ ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
78
+
79
+ - name: Install python env
80
+ id: pipenv
81
+ run: |
82
+ cd examples/server/bench
83
+ python3 -m venv venv
84
+ source venv/bin/activate
85
+ pip install -r requirements.txt
86
+
87
+ - name: Prometheus
88
+ id: install_prometheus
89
+ run: |
90
+ wget --quiet https://github.com/prometheus/prometheus/releases/download/v2.51.0/prometheus-2.51.0.linux-amd64.tar.gz
91
+ tar xzf prometheus*.tar.gz --strip-components=1
92
+ ./prometheus --config.file=examples/server/bench/prometheus.yml &
93
+ while ! nc -z localhost 9090; do
94
+ sleep 0.1
95
+ done
96
+
97
+ - name: Set up Go
98
+ uses: actions/setup-go@v5
99
+ with:
100
+ go-version: '1.21'
101
+
102
+ - name: Install k6 and xk6-sse
103
+ id: k6_installation
104
+ run: |
105
+ cd examples/server/bench
106
+ go install go.k6.io/xk6/cmd/xk6@latest
107
+ xk6 build master \
108
+ --with github.com/phymbert/xk6-sse
109
+
110
+ - name: Build
111
+ id: cmake_build
112
+ run: |
113
+ set -eux
114
+ cmake -B build \
115
+ -DGGML_NATIVE=OFF \
116
+ -DLLAMA_BUILD_SERVER=ON \
117
+ -DLLAMA_CURL=ON \
118
+ -DLLAMA_CUBLAS=ON \
119
+ -DCUDAToolkit_ROOT=/usr/local/cuda \
120
+ -DCMAKE_CUDA_COMPILER=/usr/local/cuda/bin/nvcc \
121
+ -DCMAKE_CUDA_ARCHITECTURES=75 \
122
+ -DLLAMA_FATAL_WARNINGS=OFF \
123
+ -DLLAMA_ALL_WARNINGS=OFF \
124
+ -DCMAKE_BUILD_TYPE=Release;
125
+ cmake --build build --config Release -j $(nproc) --target llama-server
126
+
127
+ - name: Download the dataset
128
+ id: download_dataset
129
+ run: |
130
+ cd examples/server/bench
131
+ wget --quiet https://huggingface.co/datasets/anon8231489123/ShareGPT_Vicuna_unfiltered/resolve/main/ShareGPT_V3_unfiltered_cleaned_split.json
132
+
133
+ - name: Server bench
134
+ id: server_bench
135
+ env:
136
+ HEAD_REF: ${{ github.head_ref || github.ref_name }}
137
+ run: |
138
+ set -eux
139
+
140
+ cd examples/server/bench
141
+ source venv/bin/activate
142
+ python bench.py \
143
+ --runner-label ${{ env.RUNNER_LABEL }} \
144
+ --name ${{ github.job }} \
145
+ --branch $HEAD_REF \
146
+ --commit ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha }} \
147
+ --scenario script.js \
148
+ --duration ${{ github.event.inputs.duration || env.DURATION }} \
149
+ --hf-repo ggml-org/models \
150
+ --hf-file ${{ matrix.model }}/ggml-model-${{ matrix.ftype }}.gguf \
151
+ --model-path-prefix /models \
152
+ --parallel ${{ env.N_USERS }} \
153
+ -ngl 33 \
154
+ --batch-size 2048 \
155
+ --ubatch-size 256 \
156
+ --ctx-size 16384 \
157
+ --n-prompts 1000 \
158
+ --max-prompt-tokens 1024 \
159
+ --max-tokens 2048
160
+
161
+ cat results.github.env >> $GITHUB_ENV
162
+
163
+ # Remove dataset as we do not want it in the artefact
164
+ rm ShareGPT_V3_unfiltered_cleaned_split.json
165
+
166
+ - uses: actions/upload-artifact@v4
167
+ with:
168
+ name: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
169
+ compression-level: 9
170
+ path: |
171
+ examples/server/bench/*.jpg
172
+ examples/server/bench/*.json
173
+ examples/server/bench/*.log
174
+
175
+ - name: Commit status
176
+ uses: Sibz/github-status-action@v1
177
+ with:
178
+ authToken: ${{secrets.GITHUB_TOKEN}}
179
+ sha: ${{ inputs.sha || github.event.pull_request.head.sha || github.sha }}
180
+ context: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
181
+ description: |
182
+ ${{ env.BENCH_RESULTS }}
183
+ state: 'success'
184
+
185
+ - name: Upload benchmark images
186
+ uses: devicons/[email protected]
187
+ continue-on-error: true # Important as it looks unstable: 503
188
+ id: imgur_step
189
+ with:
190
+ client_id: ${{secrets.IMGUR_CLIENT_ID}}
191
+ path: |
192
+ examples/server/bench/prompt_tokens_seconds.jpg
193
+ examples/server/bench/predicted_tokens_seconds.jpg
194
+ examples/server/bench/kv_cache_usage_ratio.jpg
195
+ examples/server/bench/requests_processing.jpg
196
+
197
+ - name: Extract mermaid
198
+ id: set_mermaid
199
+ run: |
200
+ set -eux
201
+
202
+ cd examples/server/bench
203
+ PROMPT_TOKENS_SECONDS=$(cat prompt_tokens_seconds.mermaid)
204
+ echo "PROMPT_TOKENS_SECONDS<<EOF" >> $GITHUB_ENV
205
+ echo "$PROMPT_TOKENS_SECONDS" >> $GITHUB_ENV
206
+ echo "EOF" >> $GITHUB_ENV
207
+
208
+ PREDICTED_TOKENS_SECONDS=$(cat predicted_tokens_seconds.mermaid)
209
+ echo "PREDICTED_TOKENS_SECONDS<<EOF" >> $GITHUB_ENV
210
+ echo "$PREDICTED_TOKENS_SECONDS" >> $GITHUB_ENV
211
+ echo "EOF" >> $GITHUB_ENV
212
+
213
+ KV_CACHE_USAGE_RATIO=$(cat kv_cache_usage_ratio.mermaid)
214
+ echo "KV_CACHE_USAGE_RATIO<<EOF" >> $GITHUB_ENV
215
+ echo "$KV_CACHE_USAGE_RATIO" >> $GITHUB_ENV
216
+ echo "EOF" >> $GITHUB_ENV
217
+
218
+ REQUESTS_PROCESSING=$(cat requests_processing.mermaid)
219
+ echo "REQUESTS_PROCESSING<<EOF" >> $GITHUB_ENV
220
+ echo "$REQUESTS_PROCESSING" >> $GITHUB_ENV
221
+ echo "EOF" >> $GITHUB_ENV
222
+
223
+ - name: Extract image url
224
+ id: extract_image_url
225
+ continue-on-error: true
226
+ run: |
227
+ set -eux
228
+
229
+ echo "IMAGE_O=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[0] }}" >> $GITHUB_ENV
230
+ echo "IMAGE_1=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[1] }}" >> $GITHUB_ENV
231
+ echo "IMAGE_2=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[2] }}" >> $GITHUB_ENV
232
+ echo "IMAGE_3=${{ fromJSON(steps.imgur_step.outputs.imgur_urls)[3] }}" >> $GITHUB_ENV
233
+
234
+ - name: Comment PR
235
+ uses: mshick/add-pr-comment@v2
236
+ id: comment_pr
237
+ if: ${{ github.event.pull_request != '' && matrix.pr_comment_enabled == 'true' }}
238
+ with:
239
+ message-id: bench-server-${{ github.job }}-${{ env.RUNNER_LABEL }}-${{ matrix.model }}-${{ matrix.ftype }}
240
+ message: |
241
+ <p align="center">
242
+
243
+ 📈 **llama.cpp server** for _${{ github.job }}_ on _${{ env.RUNNER_LABEL }}_ for `${{ matrix.model }}`-`${{ matrix.ftype }}`: **${{ env.BENCH_ITERATIONS}} iterations** 🚀
244
+
245
+ </p>
246
+
247
+ <details>
248
+
249
+ <summary>Expand details for performance related PR only</summary>
250
+
251
+ - Concurrent users: ${{ env.N_USERS }}, duration: ${{ github.event.inputs.duration || env.DURATION }}
252
+ - HTTP request : avg=${{ env.HTTP_REQ_DURATION_AVG }}ms p(95)=${{ env.HTTP_REQ_DURATION_P_95_ }}ms fails=${{ env.HTTP_REQ_FAILED_PASSES }}, finish reason: stop=${{ env.LLAMACPP_COMPLETIONS_STOP_RATE_PASSES }} truncated=${{ env.LLAMACPP_COMPLETIONS_TRUNCATED_RATE_PASSES }}
253
+ - Prompt processing (pp): avg=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_AVG }}tk/s p(95)=${{ env.LLAMACPP_PROMPT_PROCESSING_SECOND_P_95_ }}tk/s
254
+ - Token generation (tg): avg=${{ env.LLAMACPP_TOKENS_SECOND_AVG }}tk/s p(95)=${{ env.LLAMACPP_TOKENS_SECOND_P_95_ }}tk/s
255
+ - ${{ env.BENCH_GRAPH_XLABEL }}
256
+
257
+
258
+ <p align="center">
259
+
260
+ <img width="100%" height="100%" src="${{ env.IMAGE_O }}" alt="prompt_tokens_seconds" />
261
+
262
+ <details>
263
+
264
+ <summary>More</summary>
265
+
266
+ ```mermaid
267
+ ${{ env.PROMPT_TOKENS_SECONDS }}
268
+ ```
269
+
270
+ </details>
271
+
272
+ <img width="100%" height="100%" src="${{ env.IMAGE_1 }}" alt="predicted_tokens_seconds"/>
273
+
274
+ <details>
275
+ <summary>More</summary>
276
+
277
+ ```mermaid
278
+ ${{ env.PREDICTED_TOKENS_SECONDS }}
279
+ ```
280
+
281
+ </details>
282
+
283
+ </p>
284
+
285
+ <details>
286
+
287
+ <summary>Details</summary>
288
+
289
+ <p align="center">
290
+
291
+ <img width="100%" height="100%" src="${{ env.IMAGE_2 }}" alt="kv_cache_usage_ratio" />
292
+
293
+ <details>
294
+ <summary>More</summary>
295
+
296
+ ```mermaid
297
+ ${{ env.KV_CACHE_USAGE_RATIO }}
298
+ ```
299
+
300
+ </details>
301
+
302
+ <img width="100%" height="100%" src="${{ env.IMAGE_3 }}" alt="requests_processing"/>
303
+
304
+ <details>
305
+ <summary>More</summary>
306
+
307
+ ```mermaid
308
+ ${{ env.REQUESTS_PROCESSING }}
309
+ ```
310
+
311
+ </details>
312
+
313
+ </p>
314
+ </details>
315
+ </details>
llama.cpp/.github/workflows/build.yml ADDED
@@ -0,0 +1,1645 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: CI
2
+
3
+ on:
4
+ workflow_dispatch: # allows manual triggering
5
+ inputs:
6
+ create_release:
7
+ description: 'Create new release'
8
+ required: true
9
+ type: boolean
10
+ push:
11
+ branches:
12
+ - master
13
+ paths: ['.github/workflows/build.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal', '**/*.comp']
14
+ pull_request:
15
+ types: [opened, synchronize, reopened]
16
+ paths: ['.github/workflows/build.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.cuh', '**/*.swift', '**/*.m', '**/*.metal', '**/*.comp']
17
+
18
+ concurrency:
19
+ group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
20
+ cancel-in-progress: true
21
+
22
+ # Fine-grant permission
23
+ # https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
24
+ permissions:
25
+ contents: write # for creating release
26
+
27
+ env:
28
+ BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
29
+ GGML_NLOOP: 3
30
+ GGML_N_THREADS: 1
31
+ LLAMA_LOG_COLORS: 1
32
+ LLAMA_LOG_PREFIX: 1
33
+ LLAMA_LOG_TIMESTAMPS: 1
34
+
35
+ jobs:
36
+ macOS-latest-cmake-arm64:
37
+ runs-on: macos-14
38
+
39
+ steps:
40
+ - name: Clone
41
+ id: checkout
42
+ uses: actions/checkout@v4
43
+ with:
44
+ fetch-depth: 0
45
+
46
+ - name: ccache
47
+ uses: hendrikmuhs/[email protected]
48
+ with:
49
+ key: macOS-latest-cmake-arm64
50
+ evict-old-files: 1d
51
+
52
+ - name: Dependencies
53
+ id: depends
54
+ continue-on-error: true
55
+ run: |
56
+ brew update
57
+
58
+ - name: Build
59
+ id: cmake_build
60
+ run: |
61
+ sysctl -a
62
+ cmake -B build \
63
+ -DCMAKE_BUILD_RPATH="@loader_path" \
64
+ -DLLAMA_FATAL_WARNINGS=ON \
65
+ -DLLAMA_CURL=ON \
66
+ -DGGML_METAL_USE_BF16=ON \
67
+ -DGGML_METAL_EMBED_LIBRARY=ON \
68
+ -DGGML_RPC=ON
69
+ cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
70
+
71
+ - name: Test
72
+ id: cmake_test
73
+ run: |
74
+ cd build
75
+ ctest -L 'main|curl' --verbose --timeout 900
76
+
77
+ - name: Determine tag name
78
+ id: tag
79
+ shell: bash
80
+ run: |
81
+ BUILD_NUMBER="$(git rev-list --count HEAD)"
82
+ SHORT_HASH="$(git rev-parse --short=7 HEAD)"
83
+ if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
84
+ echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
85
+ else
86
+ SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
87
+ echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
88
+ fi
89
+
90
+ - name: Pack artifacts
91
+ id: pack_artifacts
92
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
93
+ run: |
94
+ cp LICENSE ./build/bin/
95
+ cp examples/run/linenoise.cpp/LICENSE ./build/bin/LICENSE.linenoise.cpp
96
+ zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip ./build/bin/*
97
+
98
+ - name: Upload artifacts
99
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
100
+ uses: actions/upload-artifact@v4
101
+ with:
102
+ path: llama-${{ steps.tag.outputs.name }}-bin-macos-arm64.zip
103
+ name: llama-bin-macos-arm64.zip
104
+
105
+ macOS-latest-cmake-x64:
106
+ runs-on: macos-13
107
+
108
+ steps:
109
+ - name: Clone
110
+ id: checkout
111
+ uses: actions/checkout@v4
112
+ with:
113
+ fetch-depth: 0
114
+
115
+ - name: ccache
116
+ uses: hendrikmuhs/[email protected]
117
+ with:
118
+ key: macOS-latest-cmake-x64
119
+ evict-old-files: 1d
120
+
121
+ - name: Dependencies
122
+ id: depends
123
+ continue-on-error: true
124
+ run: |
125
+ brew update
126
+
127
+ - name: Build
128
+ id: cmake_build
129
+ run: |
130
+ sysctl -a
131
+ # Metal is disabled due to intermittent failures with Github runners not having a GPU:
132
+ # https://github.com/ggerganov/llama.cpp/actions/runs/8635935781/job/23674807267#step:5:2313
133
+ cmake -B build \
134
+ -DCMAKE_BUILD_RPATH="@loader_path" \
135
+ -DLLAMA_FATAL_WARNINGS=ON \
136
+ -DLLAMA_CURL=ON \
137
+ -DGGML_METAL=OFF \
138
+ -DGGML_RPC=ON
139
+ cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
140
+
141
+ - name: Test
142
+ id: cmake_test
143
+ run: |
144
+ cd build
145
+ ctest -L main --verbose --timeout 900
146
+
147
+ - name: Determine tag name
148
+ id: tag
149
+ shell: bash
150
+ run: |
151
+ BUILD_NUMBER="$(git rev-list --count HEAD)"
152
+ SHORT_HASH="$(git rev-parse --short=7 HEAD)"
153
+ if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
154
+ echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
155
+ else
156
+ SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
157
+ echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
158
+ fi
159
+
160
+ - name: Pack artifacts
161
+ id: pack_artifacts
162
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
163
+ run: |
164
+ cp LICENSE ./build/bin/
165
+ cp examples/run/linenoise.cpp/LICENSE ./build/bin/LICENSE.linenoise.cpp
166
+ zip -r llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip ./build/bin/*
167
+
168
+ - name: Upload artifacts
169
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
170
+ uses: actions/upload-artifact@v4
171
+ with:
172
+ path: llama-${{ steps.tag.outputs.name }}-bin-macos-x64.zip
173
+ name: llama-bin-macos-x64.zip
174
+
175
+ ubuntu-cpu-cmake:
176
+ runs-on: ubuntu-22.04
177
+
178
+ steps:
179
+ - name: Clone
180
+ id: checkout
181
+ uses: actions/checkout@v4
182
+ with:
183
+ fetch-depth: 0
184
+
185
+ - name: ccache
186
+ uses: hendrikmuhs/[email protected]
187
+ with:
188
+ key: ubuntu-cpu-cmake
189
+ evict-old-files: 1d
190
+
191
+ - name: Dependencies
192
+ id: depends
193
+ run: |
194
+ sudo apt-get update
195
+ sudo apt-get install build-essential libcurl4-openssl-dev
196
+
197
+ - name: Build
198
+ id: cmake_build
199
+ run: |
200
+ cmake -B build \
201
+ -DLLAMA_FATAL_WARNINGS=ON \
202
+ -DLLAMA_CURL=ON \
203
+ -DGGML_RPC=ON
204
+ cmake --build build --config Release -j $(nproc)
205
+
206
+ - name: Test
207
+ id: cmake_test
208
+ run: |
209
+ cd build
210
+ ctest -L 'main|curl' --verbose --timeout 900
211
+
212
+ - name: Test llama2c conversion
213
+ id: llama2c_test
214
+ run: |
215
+ cd build
216
+ echo "Fetch tokenizer"
217
+ wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/tok512.bin
218
+ echo "Fetch llama2c model"
219
+ wget https://huggingface.co/karpathy/tinyllamas/resolve/main/stories260K/stories260K.bin
220
+ ./bin/llama-convert-llama2c-to-ggml --copy-vocab-from-model ./tok512.bin --llama2c-model stories260K.bin --llama2c-output-model stories260K.gguf
221
+ ./bin/llama-cli -m stories260K.gguf -p "One day, Lily met a Shoggoth" -n 500 -c 256
222
+
223
+ - name: Determine tag name
224
+ id: tag
225
+ shell: bash
226
+ run: |
227
+ BUILD_NUMBER="$(git rev-list --count HEAD)"
228
+ SHORT_HASH="$(git rev-parse --short=7 HEAD)"
229
+ if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
230
+ echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
231
+ else
232
+ SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
233
+ echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
234
+ fi
235
+
236
+ - name: Pack artifacts
237
+ id: pack_artifacts
238
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
239
+ run: |
240
+ cp LICENSE ./build/bin/
241
+ cp examples/run/linenoise.cpp/LICENSE ./build/bin/LICENSE.linenoise.cpp
242
+ zip -r llama-${{ steps.tag.outputs.name }}-bin-ubuntu-x64.zip ./build/bin/*
243
+
244
+ - name: Upload artifacts
245
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
246
+ uses: actions/upload-artifact@v4
247
+ with:
248
+ path: llama-${{ steps.tag.outputs.name }}-bin-ubuntu-x64.zip
249
+ name: llama-bin-ubuntu-x64.zip
250
+
251
+ ubuntu-latest-cmake-sanitizer:
252
+ runs-on: ubuntu-latest
253
+
254
+ continue-on-error: true
255
+
256
+ strategy:
257
+ matrix:
258
+ sanitizer: [ADDRESS, THREAD, UNDEFINED]
259
+ build_type: [Debug]
260
+
261
+ steps:
262
+ - name: Clone
263
+ id: checkout
264
+ uses: actions/checkout@v4
265
+
266
+ - name: ccache
267
+ uses: hendrikmuhs/[email protected]
268
+ with:
269
+ key: ubuntu-latest-cmake-sanitizer-${{ matrix.sanitizer }}
270
+ evict-old-files: 1d
271
+
272
+ - name: Dependencies
273
+ id: depends
274
+ run: |
275
+ sudo apt-get update
276
+ sudo apt-get install build-essential
277
+
278
+ - name: Build
279
+ id: cmake_build
280
+ if: ${{ matrix.sanitizer != 'THREAD' }}
281
+ run: |
282
+ cmake -B build \
283
+ -DLLAMA_FATAL_WARNINGS=ON \
284
+ -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \
285
+ -DCMAKE_BUILD_TYPE=${{ matrix.build_type }}
286
+ cmake --build build --config ${{ matrix.build_type }} -j $(nproc)
287
+
288
+ - name: Build (no OpenMP)
289
+ id: cmake_build_no_openmp
290
+ if: ${{ matrix.sanitizer == 'THREAD' }}
291
+ run: |
292
+ cmake -B build \
293
+ -DLLAMA_FATAL_WARNINGS=ON \
294
+ -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \
295
+ -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
296
+ -DGGML_OPENMP=OFF
297
+ cmake --build build --config ${{ matrix.build_type }} -j $(nproc)
298
+
299
+ - name: Test
300
+ id: cmake_test
301
+ run: |
302
+ cd build
303
+ ctest -L main --verbose --timeout 900
304
+
305
+ ubuntu-latest-llguidance:
306
+ runs-on: ubuntu-latest
307
+
308
+ steps:
309
+ - name: Clone
310
+ id: checkout
311
+ uses: actions/checkout@v4
312
+
313
+ - name: Dependencies
314
+ id: depends
315
+ run: |
316
+ sudo apt-get update
317
+ sudo apt-get install build-essential
318
+
319
+ - name: Build
320
+ id: cmake_build
321
+ run: |
322
+ mkdir build
323
+ cd build
324
+ cmake .. \
325
+ -DLLAMA_FATAL_WARNINGS=ON \
326
+ -DLLAMA_LLGUIDANCE=ON
327
+ cmake --build . --config Release -j $(nproc)
328
+
329
+ - name: Test
330
+ id: cmake_test
331
+ run: |
332
+ cd build
333
+ ctest -L main --verbose --timeout 900
334
+
335
+ ubuntu-latest-cmake-rpc:
336
+ runs-on: ubuntu-latest
337
+
338
+ continue-on-error: true
339
+
340
+ steps:
341
+ - name: Clone
342
+ id: checkout
343
+ uses: actions/checkout@v4
344
+
345
+ - name: ccache
346
+ uses: hendrikmuhs/[email protected]
347
+ with:
348
+ key: ubuntu-latest-cmake-rpc
349
+ evict-old-files: 1d
350
+
351
+ - name: Dependencies
352
+ id: depends
353
+ run: |
354
+ sudo apt-get update
355
+ sudo apt-get install build-essential
356
+
357
+ - name: Build
358
+ id: cmake_build
359
+ run: |
360
+ cmake -B build \
361
+ -DGGML_RPC=ON
362
+ cmake --build build --config Release -j $(nproc)
363
+
364
+ - name: Test
365
+ id: cmake_test
366
+ run: |
367
+ cd build
368
+ ctest -L main --verbose
369
+
370
+ ubuntu-22-cmake-vulkan:
371
+ runs-on: ubuntu-22.04
372
+
373
+ steps:
374
+ - name: Clone
375
+ id: checkout
376
+ uses: actions/checkout@v4
377
+
378
+ - name: ccache
379
+ uses: hendrikmuhs/[email protected]
380
+ with:
381
+ key: ubuntu-22-cmake-vulkan
382
+ evict-old-files: 1d
383
+
384
+ - name: Dependencies
385
+ id: depends
386
+ run: |
387
+ wget -qO - https://packages.lunarg.com/lunarg-signing-key-pub.asc | sudo apt-key add -
388
+ sudo wget -qO /etc/apt/sources.list.d/lunarg-vulkan-jammy.list https://packages.lunarg.com/vulkan/lunarg-vulkan-jammy.list
389
+ sudo apt-get update -y
390
+ sudo apt-get install -y build-essential mesa-vulkan-drivers vulkan-sdk
391
+
392
+ - name: Build
393
+ id: cmake_build
394
+ run: |
395
+ cmake -B build \
396
+ -DGGML_VULKAN=ON
397
+ cmake --build build --config Release -j $(nproc)
398
+
399
+ - name: Test
400
+ id: cmake_test
401
+ run: |
402
+ cd build
403
+ # This is using llvmpipe and runs slower than other backends
404
+ ctest -L main --verbose --timeout 1800
405
+
406
+ ubuntu-22-cmake-hip:
407
+ runs-on: ubuntu-22.04
408
+ container: rocm/dev-ubuntu-22.04:6.0.2
409
+
410
+ steps:
411
+ - name: Clone
412
+ id: checkout
413
+ uses: actions/checkout@v4
414
+
415
+ - name: Dependencies
416
+ id: depends
417
+ run: |
418
+ sudo apt-get update
419
+ sudo apt-get install -y build-essential git cmake rocblas-dev hipblas-dev
420
+
421
+ - name: ccache
422
+ uses: hendrikmuhs/[email protected]
423
+ with:
424
+ key: ubuntu-22-cmake-hip
425
+ evict-old-files: 1d
426
+
427
+ - name: Build with native CMake HIP support
428
+ id: cmake_build
429
+ run: |
430
+ cmake -B build -S . \
431
+ -DCMAKE_HIP_COMPILER="$(hipconfig -l)/clang" \
432
+ -DGGML_HIP=ON
433
+ cmake --build build --config Release -j $(nproc)
434
+
435
+ - name: Build with legacy HIP support
436
+ id: cmake_build_legacy_hip
437
+ run: |
438
+ cmake -B build2 -S . \
439
+ -DCMAKE_C_COMPILER=hipcc \
440
+ -DCMAKE_CXX_COMPILER=hipcc \
441
+ -DGGML_HIP=ON
442
+ cmake --build build2 --config Release -j $(nproc)
443
+
444
+ ubuntu-22-cmake-musa:
445
+ runs-on: ubuntu-22.04
446
+ container: mthreads/musa:rc3.1.0-devel-ubuntu22.04
447
+
448
+ steps:
449
+ - name: Clone
450
+ id: checkout
451
+ uses: actions/checkout@v4
452
+
453
+ - name: Dependencies
454
+ id: depends
455
+ run: |
456
+ apt-get update
457
+ apt-get install -y build-essential git cmake libcurl4-openssl-dev
458
+
459
+ - name: ccache
460
+ uses: hendrikmuhs/[email protected]
461
+ with:
462
+ key: ubuntu-22-cmake-musa
463
+ evict-old-files: 1d
464
+
465
+ - name: Build with native CMake MUSA support
466
+ id: cmake_build
467
+ run: |
468
+ cmake -B build -S . \
469
+ -DGGML_MUSA=ON
470
+ cmake --build build --config Release -j $(nproc)
471
+
472
+ ubuntu-22-cmake-sycl:
473
+ runs-on: ubuntu-22.04
474
+
475
+ continue-on-error: true
476
+
477
+ steps:
478
+ - uses: actions/checkout@v4
479
+
480
+ - name: add oneAPI to apt
481
+ shell: bash
482
+ run: |
483
+ cd /tmp
484
+ wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
485
+ sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
486
+ rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
487
+ sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
488
+
489
+ - name: install oneAPI dpcpp compiler
490
+ shell: bash
491
+ run: |
492
+ sudo apt update
493
+ sudo apt install intel-oneapi-compiler-dpcpp-cpp
494
+
495
+ - name: install oneAPI MKL library
496
+ shell: bash
497
+ run: |
498
+ sudo apt install intel-oneapi-mkl-devel
499
+
500
+ - name: Clone
501
+ id: checkout
502
+ uses: actions/checkout@v4
503
+
504
+ - name: ccache
505
+ uses: hendrikmuhs/[email protected]
506
+ with:
507
+ key: ubuntu-22-cmake-sycl
508
+ evict-old-files: 1d
509
+
510
+ - name: Build
511
+ id: cmake_build
512
+ run: |
513
+ source /opt/intel/oneapi/setvars.sh
514
+ cmake -B build \
515
+ -DGGML_SYCL=ON \
516
+ -DCMAKE_C_COMPILER=icx \
517
+ -DCMAKE_CXX_COMPILER=icpx
518
+ cmake --build build --config Release -j $(nproc)
519
+
520
+ ubuntu-22-cmake-sycl-fp16:
521
+ runs-on: ubuntu-22.04
522
+
523
+ continue-on-error: true
524
+
525
+ steps:
526
+ - uses: actions/checkout@v4
527
+
528
+ - name: add oneAPI to apt
529
+ shell: bash
530
+ run: |
531
+ cd /tmp
532
+ wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
533
+ sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
534
+ rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
535
+ sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
536
+
537
+ - name: install oneAPI dpcpp compiler
538
+ shell: bash
539
+ run: |
540
+ sudo apt update
541
+ sudo apt install intel-oneapi-compiler-dpcpp-cpp
542
+
543
+ - name: install oneAPI MKL library
544
+ shell: bash
545
+ run: |
546
+ sudo apt install intel-oneapi-mkl-devel
547
+
548
+ - name: Clone
549
+ id: checkout
550
+ uses: actions/checkout@v4
551
+
552
+ - name: ccache
553
+ uses: hendrikmuhs/[email protected]
554
+ with:
555
+ key: ubuntu-22-cmake-sycl-fp16
556
+ evict-old-files: 1d
557
+
558
+ - name: Build
559
+ id: cmake_build
560
+ run: |
561
+ source /opt/intel/oneapi/setvars.sh
562
+ cmake -B build \
563
+ -DGGML_SYCL=ON \
564
+ -DCMAKE_C_COMPILER=icx \
565
+ -DCMAKE_CXX_COMPILER=icpx \
566
+ -DGGML_SYCL_F16=ON
567
+ cmake --build build --config Release -j $(nproc)
568
+
569
+ macOS-latest-cmake-ios:
570
+ runs-on: macos-latest
571
+
572
+ steps:
573
+ - name: Clone
574
+ id: checkout
575
+ uses: actions/checkout@v4
576
+
577
+ - name: ccache
578
+ uses: hendrikmuhs/[email protected]
579
+ with:
580
+ key: macOS-latest-cmake-ios
581
+ evict-old-files: 1d
582
+
583
+ - name: Dependencies
584
+ id: depends
585
+ continue-on-error: true
586
+ run: |
587
+ brew update
588
+
589
+ - name: Build
590
+ id: cmake_build
591
+ run: |
592
+ sysctl -a
593
+ cmake -B build -G Xcode \
594
+ -DGGML_METAL_USE_BF16=ON \
595
+ -DGGML_METAL_EMBED_LIBRARY=ON \
596
+ -DLLAMA_BUILD_EXAMPLES=OFF \
597
+ -DLLAMA_BUILD_TESTS=OFF \
598
+ -DLLAMA_BUILD_SERVER=OFF \
599
+ -DCMAKE_SYSTEM_NAME=iOS \
600
+ -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
601
+ -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
602
+ cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
603
+
604
+ macOS-latest-cmake-tvos:
605
+ runs-on: macos-latest
606
+
607
+ steps:
608
+ - name: Clone
609
+ id: checkout
610
+ uses: actions/checkout@v4
611
+
612
+ - name: ccache
613
+ uses: hendrikmuhs/[email protected]
614
+ with:
615
+ key: macOS-latest-cmake-tvos
616
+ evict-old-files: 1d
617
+
618
+ - name: Dependencies
619
+ id: depends
620
+ continue-on-error: true
621
+ run: |
622
+ brew update
623
+
624
+ - name: Build
625
+ id: cmake_build
626
+ run: |
627
+ sysctl -a
628
+ cmake -B build -G Xcode \
629
+ -DGGML_METAL_USE_BF16=ON \
630
+ -DGGML_METAL_EMBED_LIBRARY=ON \
631
+ -DLLAMA_BUILD_EXAMPLES=OFF \
632
+ -DLLAMA_BUILD_TESTS=OFF \
633
+ -DLLAMA_BUILD_SERVER=OFF \
634
+ -DCMAKE_SYSTEM_NAME=tvOS \
635
+ -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
636
+ -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
637
+ cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
638
+
639
+ macOS-latest-swift:
640
+ runs-on: macos-latest
641
+
642
+ strategy:
643
+ matrix:
644
+ destination: ['generic/platform=macOS', 'generic/platform=iOS', 'generic/platform=tvOS']
645
+
646
+ steps:
647
+ - name: Clone
648
+ id: checkout
649
+ uses: actions/checkout@v4
650
+
651
+ - name: ccache
652
+ uses: hendrikmuhs/[email protected]
653
+ with:
654
+ key: macOS-latest-swift
655
+ evict-old-files: 1d
656
+
657
+ - name: Dependencies
658
+ id: depends
659
+ continue-on-error: true
660
+ run: |
661
+ brew update
662
+
663
+ - name: Build llama.cpp with CMake
664
+ id: cmake_build
665
+ run: |
666
+ sysctl -a
667
+ cmake -B build -G Xcode \
668
+ -DGGML_METAL_USE_BF16=ON \
669
+ -DGGML_METAL_EMBED_LIBRARY=ON \
670
+ -DLLAMA_BUILD_EXAMPLES=OFF \
671
+ -DLLAMA_BUILD_TESTS=OFF \
672
+ -DLLAMA_BUILD_SERVER=OFF \
673
+ -DCMAKE_OSX_ARCHITECTURES="arm64;x86_64"
674
+ cmake --build build --config Release -j $(sysctl -n hw.logicalcpu)
675
+ sudo cmake --install build --config Release
676
+
677
+ - name: xcodebuild for swift package
678
+ id: xcodebuild
679
+ run: |
680
+ xcodebuild -scheme llama-Package -destination "${{ matrix.destination }}"
681
+
682
+ windows-msys2:
683
+ runs-on: windows-latest
684
+
685
+ strategy:
686
+ fail-fast: false
687
+ matrix:
688
+ include:
689
+ - { sys: UCRT64, env: ucrt-x86_64, build: Release }
690
+ - { sys: CLANG64, env: clang-x86_64, build: Release }
691
+
692
+ steps:
693
+ - name: Clone
694
+ uses: actions/checkout@v4
695
+
696
+ - name: ccache
697
+ uses: hendrikmuhs/[email protected]
698
+ with:
699
+ key: windows-msys2
700
+ variant: sccache
701
+ evict-old-files: 1d
702
+
703
+ - name: Setup ${{ matrix.sys }}
704
+ uses: msys2/setup-msys2@v2
705
+ with:
706
+ update: true
707
+ msystem: ${{matrix.sys}}
708
+ install: >-
709
+ base-devel
710
+ git
711
+ mingw-w64-${{matrix.env}}-toolchain
712
+ mingw-w64-${{matrix.env}}-cmake
713
+ mingw-w64-${{matrix.env}}-openblas
714
+
715
+ - name: Build using CMake
716
+ shell: msys2 {0}
717
+ run: |
718
+ cmake -B build
719
+ cmake --build build --config ${{ matrix.build }} -j $(nproc)
720
+
721
+ - name: Clean after building using CMake
722
+ shell: msys2 {0}
723
+ run: |
724
+ rm -rf build
725
+
726
+ - name: Build using CMake w/ OpenBLAS
727
+ shell: msys2 {0}
728
+ run: |
729
+ cmake -B build -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS
730
+ cmake --build build --config ${{ matrix.build }} -j $(nproc)
731
+
732
+ windows-latest-cmake:
733
+ runs-on: windows-latest
734
+
735
+ env:
736
+ OPENBLAS_VERSION: 0.3.23
737
+ SDE_VERSION: 9.33.0-2024-01-07
738
+ VULKAN_VERSION: 1.3.261.1
739
+
740
+ strategy:
741
+ matrix:
742
+ include:
743
+ - build: 'noavx-x64'
744
+ defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX=OFF -DGGML_AVX2=OFF -DGGML_FMA=OFF'
745
+ - build: 'avx2-x64'
746
+ defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON'
747
+ - build: 'avx-x64'
748
+ defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX2=OFF'
749
+ - build: 'avx512-x64'
750
+ defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_AVX512=ON'
751
+ - build: 'openblas-x64'
752
+ defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS -DBLAS_INCLUDE_DIRS="$env:RUNNER_TEMP/openblas/include" -DBLAS_LIBRARIES="$env:RUNNER_TEMP/openblas/lib/openblas.lib"'
753
+ - build: 'kompute-x64'
754
+ defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_KOMPUTE=ON -DKOMPUTE_OPT_DISABLE_VULKAN_VERSION_CHECK=ON'
755
+ - build: 'vulkan-x64'
756
+ defines: '-DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON -DGGML_RPC=ON -DGGML_VULKAN=ON'
757
+ - build: 'llvm-arm64'
758
+ defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON'
759
+ - build: 'msvc-arm64'
760
+ defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-msvc.cmake -DGGML_NATIVE=OFF -DLLAMA_BUILD_SERVER=ON'
761
+ - build: 'llvm-arm64-opencl-adreno'
762
+ defines: '-G "Ninja Multi-Config" -D CMAKE_TOOLCHAIN_FILE=cmake/arm64-windows-llvm.cmake -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" -DGGML_OPENCL=ON -DGGML_OPENCL_USE_ADRENO_KERNELS=ON'
763
+
764
+ steps:
765
+ - name: Clone
766
+ id: checkout
767
+ uses: actions/checkout@v4
768
+ with:
769
+ fetch-depth: 0
770
+
771
+ - name: ccache
772
+ uses: hendrikmuhs/[email protected]
773
+ with:
774
+ key: windows-latest-cmake-${{ matrix.build }}
775
+ variant: sccache
776
+ evict-old-files: 1d
777
+
778
+ - name: Clone Kompute submodule
779
+ id: clone_kompute
780
+ if: ${{ matrix.build == 'kompute-x64' }}
781
+ run: |
782
+ git submodule update --init ggml/src/ggml-kompute/kompute
783
+
784
+ - name: Download OpenBLAS
785
+ id: get_openblas
786
+ if: ${{ matrix.build == 'openblas-x64' }}
787
+ run: |
788
+ curl.exe -o $env:RUNNER_TEMP/openblas.zip -L "https://github.com/xianyi/OpenBLAS/releases/download/v${env:OPENBLAS_VERSION}/OpenBLAS-${env:OPENBLAS_VERSION}-x64.zip"
789
+ curl.exe -o $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt -L "https://github.com/xianyi/OpenBLAS/raw/v${env:OPENBLAS_VERSION}/LICENSE"
790
+ mkdir $env:RUNNER_TEMP/openblas
791
+ tar.exe -xvf $env:RUNNER_TEMP/openblas.zip -C $env:RUNNER_TEMP/openblas
792
+ $vcdir = $(vswhere -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath)
793
+ $msvc = $(join-path $vcdir $('VC\Tools\MSVC\'+$(gc -raw $(join-path $vcdir 'VC\Auxiliary\Build\Microsoft.VCToolsVersion.default.txt')).Trim()))
794
+ $lib = $(join-path $msvc 'bin\Hostx64\x64\lib.exe')
795
+ & $lib /machine:x64 "/def:${env:RUNNER_TEMP}/openblas/lib/libopenblas.def" "/out:${env:RUNNER_TEMP}/openblas/lib/openblas.lib" /name:openblas.dll
796
+
797
+ - name: Install Vulkan SDK
798
+ id: get_vulkan
799
+ if: ${{ matrix.build == 'kompute-x64' || matrix.build == 'vulkan-x64' }}
800
+ run: |
801
+ curl.exe -o $env:RUNNER_TEMP/VulkanSDK-Installer.exe -L "https://sdk.lunarg.com/sdk/download/${env:VULKAN_VERSION}/windows/VulkanSDK-${env:VULKAN_VERSION}-Installer.exe"
802
+ & "$env:RUNNER_TEMP\VulkanSDK-Installer.exe" --accept-licenses --default-answer --confirm-command install
803
+ Add-Content $env:GITHUB_ENV "VULKAN_SDK=C:\VulkanSDK\${env:VULKAN_VERSION}"
804
+ Add-Content $env:GITHUB_PATH "C:\VulkanSDK\${env:VULKAN_VERSION}\bin"
805
+
806
+ - name: Install Ninja
807
+ id: install_ninja
808
+ run: |
809
+ choco install ninja
810
+
811
+ - name: Install OpenCL Headers and Libs
812
+ id: install_opencl
813
+ if: ${{ matrix.build == 'llvm-arm64-opencl-adreno' }}
814
+ run: |
815
+ git clone https://github.com/KhronosGroup/OpenCL-Headers
816
+ cd OpenCL-Headers
817
+ cmake -B build `
818
+ -DBUILD_TESTING=OFF `
819
+ -DOPENCL_HEADERS_BUILD_TESTING=OFF `
820
+ -DOPENCL_HEADERS_BUILD_CXX_TESTS=OFF `
821
+ -DCMAKE_INSTALL_PREFIX="$env:RUNNER_TEMP/opencl-arm64-release"
822
+ cmake --build build --target install
823
+ git clone https://github.com/KhronosGroup/OpenCL-ICD-Loader
824
+ cd OpenCL-ICD-Loader
825
+ cmake -B build-arm64-release `
826
+ -A arm64 `
827
+ -DCMAKE_PREFIX_PATH="$env:RUNNER_TEMP/opencl-arm64-release" `
828
+ -DCMAKE_INSTALL_PREFIX="$env:RUNNER_TEMP/opencl-arm64-release"
829
+ cmake --build build-arm64-release --target install --config release
830
+
831
+ - name: Build
832
+ id: cmake_build
833
+ run: |
834
+ cmake -S . -B build ${{ matrix.defines }}
835
+ cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS}
836
+
837
+ - name: Add libopenblas.dll
838
+ id: add_libopenblas_dll
839
+ if: ${{ matrix.build == 'openblas-x64' }}
840
+ run: |
841
+ cp $env:RUNNER_TEMP/openblas/bin/libopenblas.dll ./build/bin/Release/openblas.dll
842
+ cp $env:RUNNER_TEMP/OpenBLAS.LICENSE.txt ./build/bin/Release/OpenBLAS-${env:OPENBLAS_VERSION}.txt
843
+
844
+ - name: Check AVX512F support
845
+ id: check_avx512f
846
+ if: ${{ matrix.build == 'avx512-x64' }}
847
+ continue-on-error: true
848
+ run: |
849
+ cd build
850
+ $vcdir = $(vswhere -latest -products * -requires Microsoft.VisualStudio.Component.VC.Tools.x86.x64 -property installationPath)
851
+ $msvc = $(join-path $vcdir $('VC\Tools\MSVC\'+$(gc -raw $(join-path $vcdir 'VC\Auxiliary\Build\Microsoft.VCToolsVersion.default.txt')).Trim()))
852
+ $cl = $(join-path $msvc 'bin\Hostx64\x64\cl.exe')
853
+ echo 'int main(void){unsigned int a[4];__cpuid(a,7);return !(a[1]&65536);}' >> avx512f.c
854
+ & $cl /O2 /GS- /kernel avx512f.c /link /nodefaultlib /entry:main
855
+ .\avx512f.exe && echo "AVX512F: YES" && ( echo HAS_AVX512F=1 >> $env:GITHUB_ENV ) || echo "AVX512F: NO"
856
+
857
+ - name: Test
858
+ id: cmake_test
859
+ # not all machines have native AVX-512
860
+ if: ${{ matrix.build != 'msvc-arm64' && matrix.build != 'llvm-arm64' && matrix.build != 'llvm-arm64-opencl-adreno' && matrix.build != 'kompute-x64' && matrix.build != 'vulkan-x64' && (matrix.build != 'avx512-x64' || env.HAS_AVX512F == '1') }}
861
+ run: |
862
+ cd build
863
+ ctest -L main -C Release --verbose --timeout 900
864
+
865
+ - name: Test (Intel SDE)
866
+ id: cmake_test_sde
867
+ if: ${{ matrix.build == 'avx512-x64' && env.HAS_AVX512F == '0' }} # use Intel SDE for AVX-512 emulation
868
+ run: |
869
+ curl.exe -o $env:RUNNER_TEMP/sde.tar.xz -L "https://downloadmirror.intel.com/813591/sde-external-${env:SDE_VERSION}-win.tar.xz"
870
+ # for some weird reason windows tar doesn't like sde tar.xz
871
+ 7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar.xz
872
+ 7z x "-o${env:RUNNER_TEMP}" $env:RUNNER_TEMP/sde.tar
873
+ $sde = $(join-path $env:RUNNER_TEMP sde-external-${env:SDE_VERSION}-win/sde.exe)
874
+ cd build
875
+ $env:LLAMA_SKIP_TESTS_SLOW_ON_EMULATOR = 1
876
+ & $sde -future -- ctest -L main -C Release --verbose --timeout 900
877
+
878
+ - name: Determine tag name
879
+ id: tag
880
+ shell: bash
881
+ run: |
882
+ BUILD_NUMBER="$(git rev-list --count HEAD)"
883
+ SHORT_HASH="$(git rev-parse --short=7 HEAD)"
884
+ if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
885
+ echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
886
+ else
887
+ SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
888
+ echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
889
+ fi
890
+
891
+ - name: Pack artifacts
892
+ id: pack_artifacts
893
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
894
+ run: |
895
+ Copy-Item LICENSE .\build\bin\Release\llama.cpp.txt
896
+ Copy-Item .\examples\run\linenoise.cpp\LICENSE .\build\bin\Release\linenoise.cpp.txt
897
+ 7z a llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}.zip .\build\bin\Release\*
898
+
899
+ - name: Upload artifacts
900
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
901
+ uses: actions/upload-artifact@v4
902
+ with:
903
+ path: llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}.zip
904
+ name: llama-bin-win-${{ matrix.build }}.zip
905
+
906
+ ubuntu-latest-cmake-cuda:
907
+ runs-on: ubuntu-latest
908
+ container: nvidia/cuda:12.6.2-devel-ubuntu24.04
909
+
910
+ steps:
911
+ - name: Clone
912
+ id: checkout
913
+ uses: actions/checkout@v4
914
+ with:
915
+ fetch-depth: 0
916
+
917
+ - name: Install dependencies
918
+ env:
919
+ DEBIAN_FRONTEND: noninteractive
920
+ run: |
921
+ apt update
922
+ apt install -y cmake build-essential ninja-build libgomp1 git
923
+
924
+ - name: ccache
925
+ uses: hendrikmuhs/[email protected]
926
+ with:
927
+ key: ubuntu-latest-cmake-cuda
928
+ evict-old-files: 1d
929
+
930
+ - name: Build with CMake
931
+ run: |
932
+ cmake -S . -B build -G Ninja \
933
+ -DCMAKE_BUILD_TYPE=Release \
934
+ -DCMAKE_CUDA_ARCHITECTURES=89-real \
935
+ -DCMAKE_EXE_LINKER_FLAGS=-Wl,--allow-shlib-undefined \
936
+ -DLLAMA_FATAL_WARNINGS=ON \
937
+ -DGGML_NATIVE=OFF \
938
+ -DGGML_CUDA=ON
939
+ cmake --build build
940
+
941
+ windows-2019-cmake-cuda:
942
+ runs-on: windows-2019
943
+
944
+ strategy:
945
+ matrix:
946
+ cuda: ['12.4', '11.7']
947
+ build: ['cuda']
948
+
949
+ steps:
950
+ - name: Clone
951
+ id: checkout
952
+ uses: actions/checkout@v4
953
+ with:
954
+ fetch-depth: 0
955
+
956
+ - name: Install ccache
957
+ uses: hendrikmuhs/[email protected]
958
+ with:
959
+ key: ${{ github.job }}-${{ matrix.cuda }}-${{ matrix.build }}
960
+ variant: sccache
961
+ evict-old-files: 1d
962
+
963
+ - name: Install Cuda Toolkit 11.7
964
+ if: ${{ matrix.cuda == '11.7' }}
965
+ run: |
966
+ mkdir -p "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7"
967
+ choco install unzip -y
968
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-11.7.99-archive.zip"
969
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-11.7.99-archive.zip"
970
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-11.7.99-archive.zip"
971
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/libcublas/windows-x86_64/libcublas-windows-x86_64-11.7.4.6-archive.zip"
972
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-11.7.91-archive.zip"
973
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-11.7.91-archive.zip"
974
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-11.7.101-archive.zip"
975
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-11.7.91-archive.zip"
976
+ unzip '*.zip' -d "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7"
977
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_cudart-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
978
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvcc-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
979
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvrtc-windows-x86_64-11.7.99-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
980
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\libcublas-windows-x86_64-11.7.4.6-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
981
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvtx-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
982
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\visual_studio_integration-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
983
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_nvprof-windows-x86_64-11.7.101-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
984
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\cuda_cccl-windows-x86_64-11.7.91-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" /E /I /H /Y
985
+ echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
986
+ echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
987
+ echo "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
988
+ echo "CUDA_PATH_V11_7=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v11.7" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
989
+
990
+ - name: Install Cuda Toolkit 12.4
991
+ if: ${{ matrix.cuda == '12.4' }}
992
+ run: |
993
+ mkdir -p "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4"
994
+ choco install unzip -y
995
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cudart/windows-x86_64/cuda_cudart-windows-x86_64-12.4.127-archive.zip"
996
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvcc/windows-x86_64/cuda_nvcc-windows-x86_64-12.4.131-archive.zip"
997
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvrtc/windows-x86_64/cuda_nvrtc-windows-x86_64-12.4.127-archive.zip"
998
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/libcublas/windows-x86_64/libcublas-windows-x86_64-12.4.5.8-archive.zip"
999
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvtx/windows-x86_64/cuda_nvtx-windows-x86_64-12.4.127-archive.zip"
1000
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_profiler_api/windows-x86_64/cuda_profiler_api-windows-x86_64-12.4.127-archive.zip"
1001
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/visual_studio_integration/windows-x86_64/visual_studio_integration-windows-x86_64-12.4.127-archive.zip"
1002
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_nvprof/windows-x86_64/cuda_nvprof-windows-x86_64-12.4.127-archive.zip"
1003
+ curl -O "https://developer.download.nvidia.com/compute/cuda/redist/cuda_cccl/windows-x86_64/cuda_cccl-windows-x86_64-12.4.127-archive.zip"
1004
+ unzip '*.zip' -d "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4"
1005
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_cudart-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
1006
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvcc-windows-x86_64-12.4.131-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
1007
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvrtc-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
1008
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\libcublas-windows-x86_64-12.4.5.8-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
1009
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvtx-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
1010
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_profiler_api-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
1011
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\visual_studio_integration-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
1012
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_nvprof-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
1013
+ xcopy "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\cuda_cccl-windows-x86_64-12.4.127-archive\*" "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" /E /I /H /Y
1014
+ echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\bin" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
1015
+ echo "C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4\libnvvp" | Out-File -FilePath $env:GITHUB_PATH -Encoding utf8 -Append
1016
+ echo "CUDA_PATH=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
1017
+ echo "CUDA_PATH_V12_4=C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v12.4" | Out-File -FilePath $env:GITHUB_ENV -Append -Encoding utf8
1018
+
1019
+ - name: Install Ninja
1020
+ id: install_ninja
1021
+ run: |
1022
+ choco install ninja
1023
+
1024
+ - name: Build
1025
+ id: cmake_build
1026
+ shell: cmd
1027
+ run: |
1028
+ call "C:\Program Files (x86)\Microsoft Visual Studio\2019\Enterprise\VC\Auxiliary\Build\vcvars64.bat"
1029
+ cmake -S . -B build -G "Ninja Multi-Config" ^
1030
+ -DLLAMA_BUILD_SERVER=ON ^
1031
+ -DGGML_NATIVE=OFF ^
1032
+ -DGGML_CUDA=ON ^
1033
+ -DGGML_RPC=ON
1034
+ set /A NINJA_JOBS=%NUMBER_OF_PROCESSORS%-1
1035
+ cmake --build build --config Release -j %NINJA_JOBS% -t ggml
1036
+ cmake --build build --config Release
1037
+
1038
+ - name: Determine tag name
1039
+ id: tag
1040
+ shell: bash
1041
+ run: |
1042
+ BUILD_NUMBER="$(git rev-list --count HEAD)"
1043
+ SHORT_HASH="$(git rev-parse --short=7 HEAD)"
1044
+ if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
1045
+ echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
1046
+ else
1047
+ SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
1048
+ echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
1049
+ fi
1050
+
1051
+ - name: Pack artifacts
1052
+ id: pack_artifacts
1053
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
1054
+ run: |
1055
+ 7z a llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}-cu${{ matrix.cuda }}-x64.zip .\build\bin\Release\*
1056
+
1057
+ - name: Upload artifacts
1058
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
1059
+ uses: actions/upload-artifact@v4
1060
+ with:
1061
+ path: llama-${{ steps.tag.outputs.name }}-bin-win-${{ matrix.build }}-cu${{ matrix.cuda }}-x64.zip
1062
+ name: llama-bin-win-cu${{ matrix.cuda }}-x64.zip
1063
+
1064
+ - name: Copy and pack Cuda runtime
1065
+ if: ${{ github.event_name == 'push' && github.ref == 'refs/heads/master' }}
1066
+ run: |
1067
+ echo "Cuda install location: ${{ env.CUDA_PATH }}"
1068
+ $dst='.\build\bin\cudart\'
1069
+ robocopy "${{env.CUDA_PATH}}\bin" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll
1070
+ robocopy "${{env.CUDA_PATH}}\lib" $dst cudart64_*.dll cublas64_*.dll cublasLt64_*.dll
1071
+ 7z a cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip $dst\*
1072
+
1073
+ - name: Upload Cuda runtime
1074
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
1075
+ uses: actions/upload-artifact@v4
1076
+ with:
1077
+ path: cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip
1078
+ name: cudart-llama-bin-win-cu${{ matrix.cuda }}-x64.zip
1079
+
1080
+ windows-latest-cmake-sycl:
1081
+ runs-on: windows-latest
1082
+
1083
+ defaults:
1084
+ run:
1085
+ shell: bash
1086
+
1087
+ env:
1088
+ WINDOWS_BASEKIT_URL: https://registrationcenter-download.intel.com/akdlm/IRC_NAS/b380d914-366b-4b77-a74a-05e3c38b3514/intel-oneapi-base-toolkit-2025.0.0.882_offline.exe
1089
+ WINDOWS_DPCPP_MKL: intel.oneapi.win.cpp-dpcpp-common:intel.oneapi.win.mkl.devel:intel.oneapi.win.dnnl:intel.oneapi.win.tbb.devel
1090
+ ONEAPI_ROOT: "C:/Program Files (x86)/Intel/oneAPI"
1091
+ steps:
1092
+ - name: Clone
1093
+ id: checkout
1094
+ uses: actions/checkout@v4
1095
+ with:
1096
+ fetch-depth: 0
1097
+
1098
+ - name: ccache
1099
+ uses: hendrikmuhs/[email protected]
1100
+ with:
1101
+ key: windows-latest-cmake-sycl
1102
+ variant: sccache
1103
+ evict-old-files: 1d
1104
+
1105
+ - name: Install
1106
+ run: |
1107
+ scripts/install-oneapi.bat $WINDOWS_BASEKIT_URL $WINDOWS_DPCPP_MKL
1108
+
1109
+ - name: Build
1110
+ id: cmake_build
1111
+ run: examples/sycl/win-build-sycl.bat
1112
+
1113
+ - name: Determine tag name
1114
+ id: tag
1115
+ shell: bash
1116
+ run: |
1117
+ BUILD_NUMBER="$(git rev-list --count HEAD)"
1118
+ SHORT_HASH="$(git rev-parse --short=7 HEAD)"
1119
+ if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
1120
+ echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
1121
+ else
1122
+ SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
1123
+ echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
1124
+ fi
1125
+
1126
+ - name: Build the release package
1127
+ id: pack_artifacts
1128
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
1129
+ run: |
1130
+ echo "cp oneAPI running time dll files in ${{ env.ONEAPI_ROOT }} to ./build/bin"
1131
+
1132
+ cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_sycl_blas.5.dll" ./build/bin
1133
+ cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_core.2.dll" ./build/bin
1134
+ cp "${{ env.ONEAPI_ROOT }}/mkl/latest/bin/mkl_tbb_thread.2.dll" ./build/bin
1135
+
1136
+ cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_adapter_level_zero.dll" ./build/bin
1137
+ cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_adapter_opencl.dll" ./build/bin
1138
+ cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_loader.dll" ./build/bin
1139
+ cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/ur_win_proxy_loader.dll" ./build/bin
1140
+
1141
+ cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/sycl8.dll" ./build/bin
1142
+ cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/svml_dispmd.dll" ./build/bin
1143
+ cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libmmd.dll" ./build/bin
1144
+ cp "${{ env.ONEAPI_ROOT }}/compiler/latest/bin/libiomp5md.dll" ./build/bin
1145
+
1146
+ cp "${{ env.ONEAPI_ROOT }}/dnnl/latest/bin/dnnl.dll" ./build/bin
1147
+ cp "${{ env.ONEAPI_ROOT }}/tbb/latest/bin/tbb12.dll" ./build/bin
1148
+
1149
+ echo "cp oneAPI running time dll files to ./build/bin done"
1150
+ 7z a llama-${{ steps.tag.outputs.name }}-bin-win-sycl-x64.zip ./build/bin/*
1151
+
1152
+ - name: Upload the release package
1153
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
1154
+ uses: actions/upload-artifact@v4
1155
+ with:
1156
+ path: llama-${{ steps.tag.outputs.name }}-bin-win-sycl-x64.zip
1157
+ name: llama-bin-win-sycl-x64.zip
1158
+
1159
+ windows-latest-cmake-hip:
1160
+ if: ${{ github.event.inputs.create_release != 'true' }}
1161
+ runs-on: windows-latest
1162
+
1163
+ steps:
1164
+ - name: Clone
1165
+ id: checkout
1166
+ uses: actions/checkout@v4
1167
+
1168
+ - name: Install
1169
+ id: depends
1170
+ run: |
1171
+ $ErrorActionPreference = "Stop"
1172
+ write-host "Downloading AMD HIP SDK Installer"
1173
+ Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
1174
+ write-host "Installing AMD HIP SDK"
1175
+ Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait
1176
+ write-host "Completed AMD HIP SDK installation"
1177
+
1178
+ - name: Verify ROCm
1179
+ id: verify
1180
+ run: |
1181
+ & 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version
1182
+
1183
+ - name: Install ccache
1184
+ uses: hendrikmuhs/[email protected]
1185
+ with:
1186
+ key: ${{ github.job }}
1187
+ evict-old-files: 1d
1188
+
1189
+ - name: Build
1190
+ id: cmake_build
1191
+ run: |
1192
+ $env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
1193
+ $env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
1194
+ cmake -G "Unix Makefiles" -B build -S . `
1195
+ -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" `
1196
+ -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" `
1197
+ -DCMAKE_BUILD_TYPE=Release `
1198
+ -DGGML_HIP=ON `
1199
+ -DGGML_RPC=ON
1200
+ cmake --build build -j ${env:NUMBER_OF_PROCESSORS}
1201
+
1202
+ windows-latest-cmake-hip-release:
1203
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
1204
+ runs-on: windows-latest
1205
+
1206
+ strategy:
1207
+ matrix:
1208
+ gpu_target: [gfx1100, gfx1101, gfx1030]
1209
+
1210
+ steps:
1211
+ - name: Clone
1212
+ id: checkout
1213
+ uses: actions/checkout@v4
1214
+ with:
1215
+ fetch-depth: 0
1216
+
1217
+ - name: ccache
1218
+ uses: hendrikmuhs/[email protected]
1219
+ with:
1220
+ key: windows-latest-cmake-hip-release
1221
+ evict-old-files: 1d
1222
+
1223
+ - name: Install
1224
+ id: depends
1225
+ run: |
1226
+ $ErrorActionPreference = "Stop"
1227
+ write-host "Downloading AMD HIP SDK Installer"
1228
+ Invoke-WebRequest -Uri "https://download.amd.com/developer/eula/rocm-hub/AMD-Software-PRO-Edition-24.Q3-WinSvr2022-For-HIP.exe" -OutFile "${env:RUNNER_TEMP}\rocm-install.exe"
1229
+ write-host "Installing AMD HIP SDK"
1230
+ Start-Process "${env:RUNNER_TEMP}\rocm-install.exe" -ArgumentList '-install' -NoNewWindow -Wait
1231
+ write-host "Completed AMD HIP SDK installation"
1232
+
1233
+ - name: Verify ROCm
1234
+ id: verify
1235
+ run: |
1236
+ & 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' --version
1237
+
1238
+ - name: Build
1239
+ id: cmake_build
1240
+ run: |
1241
+ $env:HIP_PATH=$(Resolve-Path 'C:\Program Files\AMD\ROCm\*\bin\clang.exe' | split-path | split-path)
1242
+ $env:CMAKE_PREFIX_PATH="${env:HIP_PATH}"
1243
+ cmake -G "Unix Makefiles" -B build -S . `
1244
+ -DCMAKE_C_COMPILER="${env:HIP_PATH}\bin\clang.exe" `
1245
+ -DCMAKE_CXX_COMPILER="${env:HIP_PATH}\bin\clang++.exe" `
1246
+ -DCMAKE_BUILD_TYPE=Release `
1247
+ -DAMDGPU_TARGETS=${{ matrix.gpu_target }} `
1248
+ -DGGML_HIP=ON `
1249
+ -DGGML_RPC=ON
1250
+ cmake --build build -j ${env:NUMBER_OF_PROCESSORS}
1251
+ md "build\bin\rocblas\library\"
1252
+ cp "${env:HIP_PATH}\bin\hipblas.dll" "build\bin\"
1253
+ cp "${env:HIP_PATH}\bin\rocblas.dll" "build\bin\"
1254
+ cp "${env:HIP_PATH}\bin\rocblas\library\*" "build\bin\rocblas\library\"
1255
+
1256
+ - name: Determine tag name
1257
+ id: tag
1258
+ shell: bash
1259
+ run: |
1260
+ BUILD_NUMBER="$(git rev-list --count HEAD)"
1261
+ SHORT_HASH="$(git rev-parse --short=7 HEAD)"
1262
+ if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
1263
+ echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
1264
+ else
1265
+ SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
1266
+ echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
1267
+ fi
1268
+
1269
+ - name: Pack artifacts
1270
+ id: pack_artifacts
1271
+ run: |
1272
+ 7z a llama-${{ steps.tag.outputs.name }}-bin-win-hip-x64-${{ matrix.gpu_target }}.zip .\build\bin\*
1273
+
1274
+ - name: Upload artifacts
1275
+ uses: actions/upload-artifact@v4
1276
+ with:
1277
+ path: llama-${{ steps.tag.outputs.name }}-bin-win-hip-x64-${{ matrix.gpu_target }}.zip
1278
+ name: llama-bin-win-hip-x64-${{ matrix.gpu_target }}.zip
1279
+
1280
+ ios-xcode-build:
1281
+ runs-on: macos-latest
1282
+
1283
+ steps:
1284
+ - name: Checkout code
1285
+ uses: actions/checkout@v4
1286
+
1287
+ - name: Build
1288
+ id: cmake_build
1289
+ run: |
1290
+ sysctl -a
1291
+ cmake -B build -G Xcode \
1292
+ -DGGML_METAL_USE_BF16=ON \
1293
+ -DGGML_METAL_EMBED_LIBRARY=ON \
1294
+ -DLLAMA_BUILD_EXAMPLES=OFF \
1295
+ -DLLAMA_BUILD_TESTS=OFF \
1296
+ -DLLAMA_BUILD_SERVER=OFF \
1297
+ -DCMAKE_SYSTEM_NAME=iOS \
1298
+ -DCMAKE_OSX_DEPLOYMENT_TARGET=14.0 \
1299
+ -DCMAKE_XCODE_ATTRIBUTE_DEVELOPMENT_TEAM=ggml
1300
+ cmake --build build --config Release -j $(sysctl -n hw.logicalcpu) -- CODE_SIGNING_ALLOWED=NO
1301
+ sudo cmake --install build --config Release
1302
+
1303
+ - name: xcodebuild for swift package
1304
+ id: xcodebuild
1305
+ run: |
1306
+ xcodebuild -scheme llama-Package -destination 'generic/platform=iOS'
1307
+
1308
+ - name: Build Xcode project
1309
+ run: xcodebuild -project examples/llama.swiftui/llama.swiftui.xcodeproj -scheme llama.swiftui -sdk iphoneos CODE_SIGNING_REQUIRED=NO CODE_SIGN_IDENTITY= -destination 'generic/platform=iOS' build
1310
+
1311
+ android-build:
1312
+ runs-on: ubuntu-latest
1313
+
1314
+ steps:
1315
+ - name: Clone
1316
+ uses: actions/checkout@v4
1317
+
1318
+ - name: ccache
1319
+ uses: hendrikmuhs/[email protected]
1320
+ with:
1321
+ key: android-build
1322
+ evict-old-files: 1d
1323
+
1324
+ - name: Set up JDK
1325
+ uses: actions/setup-java@v3
1326
+ with:
1327
+ java-version: 17
1328
+ distribution: zulu
1329
+
1330
+ - name: Setup Android SDK
1331
+ uses: android-actions/setup-android@v3
1332
+ with:
1333
+ log-accepted-android-sdk-licenses: false
1334
+
1335
+ - name: Build
1336
+ run: |
1337
+ cd examples/llama.android
1338
+
1339
+ ./gradlew build --no-daemon
1340
+
1341
+ release:
1342
+ if: ${{ ( github.event_name == 'push' && github.ref == 'refs/heads/master' ) || github.event.inputs.create_release == 'true' }}
1343
+
1344
+ runs-on: ubuntu-latest
1345
+
1346
+ needs:
1347
+ - ubuntu-cpu-cmake
1348
+ - windows-latest-cmake
1349
+ - windows-2019-cmake-cuda
1350
+ - windows-latest-cmake-hip-release
1351
+ - macOS-latest-cmake-arm64
1352
+ - macOS-latest-cmake-x64
1353
+
1354
+ steps:
1355
+ - name: Clone
1356
+ id: checkout
1357
+ uses: actions/checkout@v4
1358
+ with:
1359
+ fetch-depth: 0
1360
+
1361
+ - name: ccache
1362
+ uses: hendrikmuhs/[email protected]
1363
+ with:
1364
+ key: release
1365
+ evict-old-files: 1d
1366
+
1367
+ - name: Determine tag name
1368
+ id: tag
1369
+ shell: bash
1370
+ run: |
1371
+ BUILD_NUMBER="$(git rev-list --count HEAD)"
1372
+ SHORT_HASH="$(git rev-parse --short=7 HEAD)"
1373
+ if [[ "${{ env.BRANCH_NAME }}" == "master" ]]; then
1374
+ echo "name=b${BUILD_NUMBER}" >> $GITHUB_OUTPUT
1375
+ else
1376
+ SAFE_NAME=$(echo "${{ env.BRANCH_NAME }}" | tr '/' '-')
1377
+ echo "name=${SAFE_NAME}-b${BUILD_NUMBER}-${SHORT_HASH}" >> $GITHUB_OUTPUT
1378
+ fi
1379
+
1380
+ - name: Download artifacts
1381
+ id: download-artifact
1382
+ uses: actions/download-artifact@v4
1383
+ with:
1384
+ path: ./artifact
1385
+
1386
+ - name: Move artifacts
1387
+ id: move_artifacts
1388
+ run: mkdir -p ./artifact/release && mv ./artifact/*/*.zip ./artifact/release
1389
+
1390
+ - name: Create release
1391
+ id: create_release
1392
+ uses: ggml-org/action-create-release@v1
1393
+ env:
1394
+ GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
1395
+ with:
1396
+ tag_name: ${{ steps.tag.outputs.name }}
1397
+
1398
+ - name: Upload release
1399
+ id: upload_release
1400
+ uses: actions/github-script@v3
1401
+ with:
1402
+ github-token: ${{secrets.GITHUB_TOKEN}}
1403
+ script: |
1404
+ const path = require('path');
1405
+ const fs = require('fs');
1406
+ const release_id = '${{ steps.create_release.outputs.id }}';
1407
+ for (let file of await fs.readdirSync('./artifact/release')) {
1408
+ if (path.extname(file) === '.zip') {
1409
+ console.log('uploadReleaseAsset', file);
1410
+ await github.repos.uploadReleaseAsset({
1411
+ owner: context.repo.owner,
1412
+ repo: context.repo.repo,
1413
+ release_id: release_id,
1414
+ name: file,
1415
+ data: await fs.readFileSync(`./artifact/release/${file}`)
1416
+ });
1417
+ }
1418
+ }
1419
+
1420
+ # ubuntu-latest-gcc:
1421
+ # runs-on: ubuntu-latest
1422
+ #
1423
+ # strategy:
1424
+ # matrix:
1425
+ # build: [Debug, Release]
1426
+ #
1427
+ # steps:
1428
+ # - name: Clone
1429
+ # uses: actions/checkout@v4
1430
+ #
1431
+ # - name: Dependencies
1432
+ # run: |
1433
+ # sudo apt-get update
1434
+ # sudo apt-get install build-essential
1435
+ # sudo apt-get install cmake
1436
+ #
1437
+ # - name: Configure
1438
+ # run: cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }}
1439
+ #
1440
+ # - name: Build
1441
+ # run: |
1442
+ # make
1443
+ #
1444
+ # ubuntu-latest-clang:
1445
+ # runs-on: ubuntu-latest
1446
+ #
1447
+ # strategy:
1448
+ # matrix:
1449
+ # build: [Debug, Release]
1450
+ #
1451
+ # steps:
1452
+ # - name: Clone
1453
+ # uses: actions/checkout@v4
1454
+ #
1455
+ # - name: Dependencies
1456
+ # run: |
1457
+ # sudo apt-get update
1458
+ # sudo apt-get install build-essential
1459
+ # sudo apt-get install cmake
1460
+ #
1461
+ # - name: Configure
1462
+ # run: cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang
1463
+ #
1464
+ # - name: Build
1465
+ # run: |
1466
+ # make
1467
+ #
1468
+ # ubuntu-latest-gcc-sanitized:
1469
+ # runs-on: ubuntu-latest
1470
+ #
1471
+ # strategy:
1472
+ # matrix:
1473
+ # sanitizer: [ADDRESS, THREAD, UNDEFINED]
1474
+ #
1475
+ # steps:
1476
+ # - name: Clone
1477
+ # uses: actions/checkout@v4
1478
+ #
1479
+ # - name: Dependencies
1480
+ # run: |
1481
+ # sudo apt-get update
1482
+ # sudo apt-get install build-essential
1483
+ # sudo apt-get install cmake
1484
+ #
1485
+ # - name: Configure
1486
+ # run: cmake . -DCMAKE_BUILD_TYPE=Debug -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON
1487
+ #
1488
+ # - name: Build
1489
+ # run: |
1490
+ # make
1491
+ #
1492
+ # windows:
1493
+ # runs-on: windows-latest
1494
+ #
1495
+ # strategy:
1496
+ # matrix:
1497
+ # build: [Release]
1498
+ # arch: [Win32, x64]
1499
+ # include:
1500
+ # - arch: Win32
1501
+ # s2arc: x86
1502
+ # - arch: x64
1503
+ # s2arc: x64
1504
+ #
1505
+ # steps:
1506
+ # - name: Clone
1507
+ # uses: actions/checkout@v4
1508
+ #
1509
+ # - name: Add msbuild to PATH
1510
+ # uses: microsoft/setup-msbuild@v1
1511
+ #
1512
+ # - name: Configure
1513
+ # run: >
1514
+ # cmake -S . -B ./build -A ${{ matrix.arch }}
1515
+ # -DCMAKE_BUILD_TYPE=${{ matrix.build }}
1516
+ #
1517
+ # - name: Build
1518
+ # run: |
1519
+ # cd ./build
1520
+ # msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }}
1521
+ #
1522
+ # - name: Upload binaries
1523
+ # uses: actions/upload-artifact@v4
1524
+ # with:
1525
+ # name: llama-bin-${{ matrix.arch }}
1526
+ # path: build/bin/${{ matrix.build }}
1527
+ #
1528
+ # windows-blas:
1529
+ # runs-on: windows-latest
1530
+ #
1531
+ # strategy:
1532
+ # matrix:
1533
+ # build: [Release]
1534
+ # arch: [Win32, x64]
1535
+ # blas: [ON]
1536
+ # include:
1537
+ # - arch: Win32
1538
+ # obzip: https://github.com/xianyi/OpenBLAS/releases/download/v0.3.21/OpenBLAS-0.3.21-x86.zip
1539
+ # s2arc: x86
1540
+ # - arch: x64
1541
+ # obzip: https://github.com/xianyi/OpenBLAS/releases/download/v0.3.21/OpenBLAS-0.3.21-x64.zip
1542
+ # s2arc: x64
1543
+ #
1544
+ # steps:
1545
+ # - name: Clone
1546
+ # uses: actions/checkout@v4
1547
+ #
1548
+ # - name: Add msbuild to PATH
1549
+ # uses: microsoft/setup-msbuild@v1
1550
+ #
1551
+ # - name: Fetch OpenBLAS
1552
+ # if: matrix.blas == 'ON'
1553
+ # run: |
1554
+ # C:/msys64/usr/bin/wget.exe -qO blas.zip ${{ matrix.obzip }}
1555
+ # 7z x blas.zip -oblas -y
1556
+ # copy blas/include/cblas.h .
1557
+ # copy blas/include/openblas_config.h .
1558
+ # echo "blasdir=$env:GITHUB_WORKSPACE/blas" >> $env:GITHUB_ENV
1559
+ #
1560
+ # - name: Configure
1561
+ # run: >
1562
+ # cmake -S . -B ./build -A ${{ matrix.arch }}
1563
+ # -DCMAKE_BUILD_TYPE=${{ matrix.build }}
1564
+ # -DLLAMA_SUPPORT_OPENBLAS=${{ matrix.blas }}
1565
+ # -DCMAKE_LIBRARY_PATH="$env:blasdir/lib"
1566
+ #
1567
+ # - name: Build
1568
+ # run: |
1569
+ # cd ./build
1570
+ # msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }}
1571
+ #
1572
+ # - name: Copy libopenblas.dll
1573
+ # if: matrix.blas == 'ON'
1574
+ # run: copy "$env:blasdir/bin/libopenblas.dll" build/bin/${{ matrix.build }}
1575
+ #
1576
+ # - name: Upload binaries
1577
+ # if: matrix.blas == 'ON'
1578
+ # uses: actions/upload-artifact@v4
1579
+ # with:
1580
+ # name: llama-blas-bin-${{ matrix.arch }}
1581
+ # path: build/bin/${{ matrix.build }}
1582
+ #
1583
+ # emscripten:
1584
+ # runs-on: ubuntu-latest
1585
+ #
1586
+ # strategy:
1587
+ # matrix:
1588
+ # build: [Release]
1589
+ #
1590
+ # steps:
1591
+ # - name: Clone
1592
+ # uses: actions/checkout@v4
1593
+ #
1594
+ # - name: Dependencies
1595
+ # run: |
1596
+ # wget -q https://github.com/emscripten-core/emsdk/archive/master.tar.gz
1597
+ # tar -xvf master.tar.gz
1598
+ # emsdk-master/emsdk update
1599
+ # emsdk-master/emsdk install latest
1600
+ # emsdk-master/emsdk activate latest
1601
+ #
1602
+ # - name: Configure
1603
+ # run: echo "tmp"
1604
+ #
1605
+ # - name: Build
1606
+ # run: |
1607
+ # pushd emsdk-master
1608
+ # source ./emsdk_env.sh
1609
+ # popd
1610
+ # emcmake cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }}
1611
+ # make
1612
+
1613
+ openEuler-latest-cmake-cann:
1614
+ if: ${{ github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'Ascend NPU') }}
1615
+ defaults:
1616
+ run:
1617
+ shell: bash -el {0}
1618
+ runs-on: ubuntu-24.04-arm
1619
+ strategy:
1620
+ matrix:
1621
+ cann:
1622
+ - '8.0.rc3.beta1-910b-openeuler22.03-py3.10'
1623
+ device:
1624
+ - 'ascend910b3'
1625
+ build:
1626
+ - 'Release'
1627
+ container: ascendai/cann:${{ matrix.cann }}
1628
+ steps:
1629
+ - name: Checkout
1630
+ uses: actions/checkout@v4
1631
+
1632
+ - name: Dependencies
1633
+ run: |
1634
+ yum update -y
1635
+ yum install -y git gcc gcc-c++ make cmake
1636
+
1637
+ - name: Build
1638
+ run: |
1639
+ export LD_LIBRARY_PATH=${ASCEND_TOOLKIT_HOME}/lib64:${ASCEND_TOOLKIT_HOME}/$(uname -m)-linux/devlib/:${LD_LIBRARY_PATH}
1640
+
1641
+ cmake -S . -B build \
1642
+ -DCMAKE_BUILD_TYPE=${{ matrix.build }} \
1643
+ -DGGML_CANN=on \
1644
+ -DSOC_TYPE=${{ matrix.device }}
1645
+ cmake --build build -j $(nproc)
llama.cpp/.github/workflows/close-issue.yml ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Close inactive issues
2
+ on:
3
+ schedule:
4
+ - cron: "42 0 * * *"
5
+
6
+ # Fine-grant permission
7
+ # https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
8
+ permissions:
9
+ issues: write
10
+
11
+ jobs:
12
+ close-issues:
13
+ runs-on: ubuntu-latest
14
+ permissions:
15
+ issues: write
16
+ pull-requests: write
17
+ steps:
18
+ - uses: actions/stale@v5
19
+ with:
20
+ exempt-issue-labels: "refactor,help wanted,good first issue,research,bug,roadmap"
21
+ days-before-issue-stale: 30
22
+ days-before-issue-close: 14
23
+ stale-issue-label: "stale"
24
+ close-issue-message: "This issue was closed because it has been inactive for 14 days since being marked as stale."
25
+ days-before-pr-stale: -1
26
+ days-before-pr-close: -1
27
+ operations-per-run: 10000
28
+ repo-token: ${{ secrets.GITHUB_TOKEN }}
llama.cpp/.github/workflows/docker.yml ADDED
@@ -0,0 +1,173 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This workflow uses actions that are not certified by GitHub.
2
+ # They are provided by a third-party and are governed by
3
+ # separate terms of service, privacy policy, and support
4
+ # documentation.
5
+
6
+ # GitHub recommends pinning actions to a commit SHA.
7
+ # To get a newer version, you will need to update the SHA.
8
+ # You can also reference a tag or branch, but the action may change without warning.
9
+
10
+ name: Publish Docker image
11
+
12
+ on:
13
+ workflow_dispatch: # allows manual triggering
14
+ schedule:
15
+ # Rebuild daily rather than on every push because it is expensive
16
+ - cron: '12 4 * * *'
17
+
18
+ concurrency:
19
+ group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
20
+ cancel-in-progress: true
21
+
22
+ # Fine-grant permission
23
+ # https://docs.github.com/en/actions/security-for-github-actions/security-guides/automatic-token-authentication#modifying-the-permissions-for-the-github_token
24
+ permissions:
25
+ packages: write
26
+
27
+ jobs:
28
+ push_to_registry:
29
+ name: Push Docker image to Docker Hub
30
+
31
+ runs-on: ubuntu-22.04
32
+ env:
33
+ COMMIT_SHA: ${{ github.sha }}
34
+ strategy:
35
+ fail-fast: false
36
+ matrix:
37
+ config:
38
+ # Multi-stage build
39
+ - { tag: "cpu", dockerfile: ".devops/cpu.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, freediskspace: false}
40
+ - { tag: "cuda", dockerfile: ".devops/cuda.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false}
41
+ - { tag: "musa", dockerfile: ".devops/musa.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false}
42
+ - { tag: "intel", dockerfile: ".devops/intel.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false}
43
+ - { tag: "vulkan", dockerfile: ".devops/vulkan.Dockerfile", platforms: "linux/amd64", full: true, light: true, server: true, freediskspace: false}
44
+ # Note: the rocm images are failing due to a compiler error and are disabled until this is fixed to allow the workflow to complete
45
+ #- {tag: "rocm", dockerfile: ".devops/rocm.Dockerfile", platforms: "linux/amd64,linux/arm64", full: true, light: true, server: true, freediskspace: true }
46
+ steps:
47
+ - name: Check out the repo
48
+ uses: actions/checkout@v4
49
+ with:
50
+ fetch-depth: 0 # preserve git history, so we can determine the build number
51
+
52
+ - name: Set up QEMU
53
+ uses: docker/setup-qemu-action@v3
54
+
55
+ - name: Set up Docker Buildx
56
+ uses: docker/setup-buildx-action@v3
57
+
58
+ - name: Log in to Docker Hub
59
+ uses: docker/login-action@v2
60
+ with:
61
+ registry: ghcr.io
62
+ username: ${{ github.repository_owner }}
63
+ password: ${{ secrets.GITHUB_TOKEN }}
64
+
65
+ - name: Determine tag name
66
+ id: tag
67
+ shell: bash
68
+ run: |
69
+ BUILD_NUMBER="$(git rev-list --count HEAD)"
70
+ SHORT_HASH="$(git rev-parse --short=7 HEAD)"
71
+ REPO_OWNER="${GITHUB_REPOSITORY_OWNER@L}" # to lower case
72
+ REPO_NAME="${{ github.event.repository.name }}"
73
+
74
+ # determine tag name postfix (build number, commit hash)
75
+ if [[ "${{ env.GITHUB_BRANCH_NAME }}" == "master" ]]; then
76
+ TAG_POSTFIX="-b${BUILD_NUMBER}"
77
+ else
78
+ SAFE_NAME=$(echo "${{ env.GITHUB_BRANCH_NAME }}" | tr '/' '-')
79
+ TAG_POSTFIX="-${SAFE_NAME}-${SHORT_HASH}"
80
+ fi
81
+ # list all tags possible
82
+ if [[ "${{ matrix.config.tag }}" == "cpu" ]]; then
83
+ TYPE=""
84
+ else
85
+ TYPE="-${{ matrix.config.tag }}"
86
+ fi
87
+ PREFIX="ghcr.io/${REPO_OWNER}/${REPO_NAME}:"
88
+ FULLTAGS="${PREFIX}full${TYPE},${PREFIX}full${TYPE}${TAG_POSTFIX}"
89
+ LIGHTTAGS="${PREFIX}light${TYPE},${PREFIX}light${TYPE}${TAG_POSTFIX}"
90
+ SERVERTAGS="${PREFIX}server${TYPE},${PREFIX}server${TYPE}${TAG_POSTFIX}"
91
+ echo "full_output_tags=$FULLTAGS" >> $GITHUB_OUTPUT
92
+ echo "light_output_tags=$LIGHTTAGS" >> $GITHUB_OUTPUT
93
+ echo "server_output_tags=$SERVERTAGS" >> $GITHUB_OUTPUT
94
+ echo "full_output_tags=$FULLTAGS" # print out for debugging
95
+ echo "light_output_tags=$LIGHTTAGS" # print out for debugging
96
+ echo "server_output_tags=$SERVERTAGS" # print out for debugging
97
+ env:
98
+ GITHUB_BRANCH_NAME: ${{ github.head_ref || github.ref_name }}
99
+ GITHUB_REPOSITORY_OWNER: '${{ github.repository_owner }}'
100
+
101
+ - name: Free Disk Space (Ubuntu)
102
+ if: ${{ matrix.config.free_disk_space == true }}
103
+ uses: ggml-org/[email protected]
104
+ with:
105
+ # this might remove tools that are actually needed,
106
+ # if set to "true" but frees about 6 GB
107
+ tool-cache: false
108
+
109
+ # all of these default to true, but feel free to set to
110
+ # "false" if necessary for your workflow
111
+ android: true
112
+ dotnet: true
113
+ haskell: true
114
+ large-packages: true
115
+ docker-images: true
116
+ swap-storage: true
117
+
118
+ - name: Build and push Full Docker image (tagged + versioned)
119
+ if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.full == true }}
120
+ uses: docker/build-push-action@v6
121
+ with:
122
+ context: .
123
+ push: true
124
+ platforms: ${{ matrix.config.platforms }}
125
+ # tag list is generated from step above
126
+ tags: ${{ steps.tag.outputs.full_output_tags }}
127
+ file: ${{ matrix.config.dockerfile }}
128
+ target: full
129
+ provenance: false
130
+ # using github experimental cache
131
+ cache-from: type=gha
132
+ cache-to: type=gha,mode=max
133
+ # return to this if the experimental github cache is having issues
134
+ #cache-to: type=local,dest=/tmp/.buildx-cache
135
+ #cache-from: type=local,src=/tmp/.buildx-cache
136
+
137
+ - name: Build and push Light Docker image (tagged + versioned)
138
+ if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.light == true }}
139
+ uses: docker/build-push-action@v6
140
+ with:
141
+ context: .
142
+ push: true
143
+ platforms: ${{ matrix.config.platforms }}
144
+ # tag list is generated from step above
145
+ tags: ${{ steps.tag.outputs.light_output_tags }}
146
+ file: ${{ matrix.config.dockerfile }}
147
+ target: light
148
+ provenance: false
149
+ # using github experimental cache
150
+ cache-from: type=gha
151
+ cache-to: type=gha,mode=max
152
+ # return to this if the experimental github cache is having issues
153
+ #cache-to: type=local,dest=/tmp/.buildx-cache
154
+ #cache-from: type=local,src=/tmp/.buildx-cache
155
+
156
+ - name: Build and push Server Docker image (tagged + versioned)
157
+ if: ${{ (github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch') && matrix.config.server == true }}
158
+ uses: docker/build-push-action@v6
159
+ with:
160
+ context: .
161
+ push: true
162
+ platforms: ${{ matrix.config.platforms }}
163
+ # tag list is generated from step above
164
+ tags: ${{ steps.tag.outputs.server_output_tags }}
165
+ file: ${{ matrix.config.dockerfile }}
166
+ target: server
167
+ provenance: false
168
+ # using github experimental cache
169
+ cache-from: type=gha
170
+ cache-to: type=gha,mode=max
171
+ # return to this if the experimental github cache is having issues
172
+ #cache-to: type=local,dest=/tmp/.buildx-cache
173
+ #cache-from: type=local,src=/tmp/.buildx-cache
llama.cpp/.github/workflows/editorconfig.yml ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: EditorConfig Checker
2
+
3
+ on:
4
+ workflow_dispatch: # allows manual triggering
5
+ inputs:
6
+ create_release:
7
+ description: 'Create new release'
8
+ required: true
9
+ type: boolean
10
+ push:
11
+ branches:
12
+ - master
13
+ pull_request:
14
+ branches:
15
+ - master
16
+
17
+ concurrency:
18
+ group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
19
+ cancel-in-progress: true
20
+
21
+ jobs:
22
+ editorconfig:
23
+ runs-on: ubuntu-latest
24
+ steps:
25
+ - uses: actions/checkout@v4
26
+ - uses: editorconfig-checker/action-editorconfig-checker@v2
27
+ with:
28
+ version: v3.0.3
29
+ - run: editorconfig-checker
llama.cpp/.github/workflows/gguf-publish.yml ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This workflow will upload a Python Package using Twine when a GGUF release is created
2
+ # For more information see: https://help.github.com/en/actions/language-and-framework-guides/using-python-with-github-actions#publishing-to-package-registries
3
+
4
+ # See `gguf-py/README.md` for how to make a release.
5
+
6
+ # This workflow uses actions that are not certified by GitHub.
7
+ # They are provided by a third-party and are governed by
8
+ # separate terms of service, privacy policy, and support
9
+ # documentation.
10
+
11
+ name: Upload Python Package
12
+
13
+ on:
14
+ workflow_dispatch:
15
+ push:
16
+ # Pattern matched against refs/tags
17
+ tags:
18
+ - 'gguf-v*' # Push events to every version tag
19
+
20
+
21
+ jobs:
22
+ deploy:
23
+
24
+ runs-on: ubuntu-latest
25
+
26
+ steps:
27
+ - uses: actions/checkout@v4
28
+ - name: Set up Python
29
+ uses: actions/setup-python@v5
30
+ with:
31
+ python-version: '3.9.x'
32
+ - name: Install dependencies
33
+ run: |
34
+ cd gguf-py
35
+ python -m pip install poetry
36
+ poetry install
37
+
38
+ - name: Build package
39
+ run: cd gguf-py && poetry build
40
+ - name: Publish package
41
+ uses: pypa/gh-action-pypi-publish@release/v1
42
+ with:
43
+ password: ${{ secrets.PYPI_API_TOKEN }}
44
+ packages-dir: gguf-py/dist
llama.cpp/.github/workflows/labeler.yml ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: "Pull Request Labeler"
2
+ on:
3
+ - pull_request_target
4
+
5
+ jobs:
6
+ labeler:
7
+ permissions:
8
+ contents: read
9
+ pull-requests: write
10
+ runs-on: ubuntu-latest
11
+ steps:
12
+ - uses: actions/checkout@v4
13
+ with:
14
+ repository: "ggerganov/llama.cpp"
15
+ - uses: actions/labeler@v5
16
+ with:
17
+ configuration-path: '.github/labeler.yml'
llama.cpp/.github/workflows/python-check-requirements.yml ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Python check requirements.txt
2
+
3
+ on:
4
+ push:
5
+ paths:
6
+ - '.github/workflows/python-check-requirements.yml'
7
+ - 'scripts/check-requirements.sh'
8
+ - 'convert*.py'
9
+ - '**/requirements*.txt'
10
+ pull_request:
11
+ paths:
12
+ - '.github/workflows/python-check-requirements.yml'
13
+ - 'scripts/check-requirements.sh'
14
+ - 'convert*.py'
15
+ - '**/requirements*.txt'
16
+
17
+ concurrency:
18
+ group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
19
+ cancel-in-progress: true
20
+
21
+ jobs:
22
+ python-check-requirements:
23
+ runs-on: ubuntu-latest
24
+ name: check-requirements
25
+ steps:
26
+ - name: Check out source repository
27
+ uses: actions/checkout@v4
28
+ - name: Set up Python environment
29
+ uses: actions/setup-python@v5
30
+ with:
31
+ python-version: "3.11"
32
+ - name: Run check-requirements.sh script
33
+ run: bash scripts/check-requirements.sh
llama.cpp/.github/workflows/python-lint.yml ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: flake8 Lint
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - master
7
+ paths: ['.github/workflows/python-lint.yml', '**/*.py']
8
+ pull_request:
9
+ types: [opened, synchronize, reopened]
10
+ paths: ['.github/workflows/python-lint.yml', '**/*.py']
11
+
12
+ concurrency:
13
+ group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
14
+ cancel-in-progress: true
15
+
16
+ jobs:
17
+ flake8-lint:
18
+ runs-on: ubuntu-latest
19
+ name: Lint
20
+ steps:
21
+ - name: Check out source repository
22
+ uses: actions/checkout@v4
23
+ - name: Set up Python environment
24
+ uses: actions/setup-python@v5
25
+ with:
26
+ python-version: "3.11"
27
+ - name: flake8 Lint
28
+ uses: py-actions/flake8@v2
29
+ with:
30
+ plugins: "flake8-no-print"
llama.cpp/.github/workflows/python-type-check.yml ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Python Type-Check
2
+
3
+ on:
4
+ push:
5
+ paths:
6
+ - '.github/workflows/python-type-check.yml'
7
+ - 'pyrightconfig.json'
8
+ - '**.py'
9
+ - '**/requirements*.txt'
10
+ pull_request:
11
+ paths:
12
+ - '.github/workflows/python-type-check.yml'
13
+ - 'pyrightconfig.json'
14
+ - '**.py'
15
+ - '**/requirements*.txt'
16
+
17
+ concurrency:
18
+ group: ${{ github.workflow }}-${{ github.head_ref && github.ref || github.run_id }}
19
+ cancel-in-progress: true
20
+
21
+ jobs:
22
+ python-type-check:
23
+ runs-on: ubuntu-latest
24
+ name: pyright type-check
25
+ steps:
26
+ - name: Check out source repository
27
+ uses: actions/checkout@v4
28
+ - name: Set up Python environment
29
+ uses: actions/setup-python@v5
30
+ with:
31
+ python-version: "3.11"
32
+ - name: Install Python dependencies
33
+ # TODO: use a venv
34
+ run: pip install -r requirements/requirements-all.txt
35
+ - name: Type-check with Pyright
36
+ uses: jakebailey/pyright-action@v2
37
+ with:
38
+ version: 1.1.382
39
+ level: warning
40
+ warnings: true
llama.cpp/.github/workflows/server.yml ADDED
@@ -0,0 +1,239 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Server build and tests
2
+ name: Server
3
+
4
+ on:
5
+ workflow_dispatch: # allows manual triggering
6
+ inputs:
7
+ sha:
8
+ description: 'Commit SHA1 to build'
9
+ required: false
10
+ type: string
11
+ slow_tests:
12
+ description: 'Run slow tests'
13
+ required: true
14
+ type: boolean
15
+ push:
16
+ branches:
17
+ - master
18
+ paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*']
19
+ pull_request:
20
+ types: [opened, synchronize, reopened]
21
+ paths: ['.github/workflows/server.yml', '**/CMakeLists.txt', '**/Makefile', '**/*.h', '**/*.hpp', '**/*.c', '**/*.cpp', '**/*.cu', '**/*.swift', '**/*.m', 'examples/server/**.*']
22
+
23
+ env:
24
+ LLAMA_LOG_COLORS: 1
25
+ LLAMA_LOG_PREFIX: 1
26
+ LLAMA_LOG_TIMESTAMPS: 1
27
+ LLAMA_LOG_VERBOSITY: 10
28
+
29
+ concurrency:
30
+ group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref || github.run_id }}
31
+ cancel-in-progress: true
32
+
33
+ jobs:
34
+ server:
35
+ runs-on: ubuntu-latest
36
+
37
+ strategy:
38
+ matrix:
39
+ sanitizer: [ADDRESS, UNDEFINED] # THREAD is broken
40
+ build_type: [RelWithDebInfo]
41
+ include:
42
+ - build_type: Release
43
+ sanitizer: ""
44
+ fail-fast: false # While -DLLAMA_SANITIZE_THREAD=ON is broken
45
+
46
+ steps:
47
+ - name: Dependencies
48
+ id: depends
49
+ run: |
50
+ sudo apt-get update
51
+ sudo apt-get -y install \
52
+ build-essential \
53
+ xxd \
54
+ git \
55
+ cmake \
56
+ curl \
57
+ wget \
58
+ language-pack-en \
59
+ libcurl4-openssl-dev
60
+
61
+ - name: Clone
62
+ id: checkout
63
+ uses: actions/checkout@v4
64
+ with:
65
+ fetch-depth: 0
66
+ ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
67
+
68
+ - name: Python setup
69
+ id: setup_python
70
+ uses: actions/setup-python@v5
71
+ with:
72
+ python-version: '3.11'
73
+
74
+ - name: Tests dependencies
75
+ id: test_dependencies
76
+ run: |
77
+ pip install -r examples/server/tests/requirements.txt
78
+
79
+ # Setup nodejs (to be used for verifying bundled index.html)
80
+ - uses: actions/setup-node@v4
81
+ with:
82
+ node-version: '22.11.0'
83
+
84
+ - name: WebUI - Install dependencies
85
+ id: webui_lint
86
+ run: |
87
+ cd examples/server/webui
88
+ npm ci
89
+
90
+ - name: WebUI - Check code format
91
+ id: webui_format
92
+ run: |
93
+ git config --global --add safe.directory $(realpath .)
94
+ cd examples/server/webui
95
+ git status
96
+
97
+ npm run format
98
+ git status
99
+ modified_files="$(git status -s)"
100
+ echo "Modified files: ${modified_files}"
101
+ if [ -n "${modified_files}" ]; then
102
+ echo "Files do not follow coding style. To fix: npm run format"
103
+ echo "${modified_files}"
104
+ exit 1
105
+ fi
106
+
107
+ - name: Verify bundled index.html
108
+ id: verify_server_index_html
109
+ run: |
110
+ git config --global --add safe.directory $(realpath .)
111
+ cd examples/server/webui
112
+ git status
113
+
114
+ npm run build
115
+ git status
116
+ modified_files="$(git status -s)"
117
+ echo "Modified files: ${modified_files}"
118
+ if [ -n "${modified_files}" ]; then
119
+ echo "Repository is dirty or server/webui is not built as expected"
120
+ echo "Hint: You may need to follow Web UI build guide in server/README.md"
121
+ echo "${modified_files}"
122
+ exit 1
123
+ fi
124
+
125
+ - name: Build (no OpenMP)
126
+ id: cmake_build_no_openmp
127
+ if: ${{ matrix.sanitizer == 'THREAD' }}
128
+ run: |
129
+ cmake -B build \
130
+ -DGGML_NATIVE=OFF \
131
+ -DLLAMA_BUILD_SERVER=ON \
132
+ -DLLAMA_CURL=ON \
133
+ -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
134
+ -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON \
135
+ -DGGML_OPENMP=OFF ;
136
+ cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
137
+
138
+ - name: Build (sanitizers)
139
+ id: cmake_build_sanitizers
140
+ if: ${{ matrix.sanitizer != '' && matrix.sanitizer != 'THREAD' }}
141
+ run: |
142
+ cmake -B build \
143
+ -DGGML_NATIVE=OFF \
144
+ -DLLAMA_BUILD_SERVER=ON \
145
+ -DLLAMA_CURL=ON \
146
+ -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} \
147
+ -DLLAMA_SANITIZE_${{ matrix.sanitizer }}=ON ;
148
+ cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
149
+
150
+ - name: Build (sanitizers)
151
+ id: cmake_build
152
+ if: ${{ matrix.sanitizer == '' }}
153
+ run: |
154
+ cmake -B build \
155
+ -DGGML_NATIVE=OFF \
156
+ -DLLAMA_BUILD_SERVER=ON \
157
+ -DLLAMA_CURL=ON \
158
+ -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} ;
159
+ cmake --build build --config ${{ matrix.build_type }} -j $(nproc) --target llama-server
160
+
161
+ - name: Tests
162
+ id: server_integration_tests
163
+ if: ${{ matrix.sanitizer == '' }}
164
+ run: |
165
+ cd examples/server/tests
166
+ ./tests.sh
167
+
168
+ - name: Tests (sanitizers)
169
+ id: server_integration_tests_sanitizers
170
+ if: ${{ matrix.sanitizer != '' }}
171
+ run: |
172
+ cd examples/server/tests
173
+ LLAMA_SANITIZE=1 ./tests.sh
174
+
175
+ - name: Slow tests
176
+ id: server_integration_tests_slow
177
+ if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
178
+ run: |
179
+ cd examples/server/tests
180
+ SLOW_TESTS=1 ./tests.sh
181
+
182
+
183
+ server-windows:
184
+ runs-on: windows-2019
185
+
186
+ steps:
187
+ - name: Clone
188
+ id: checkout
189
+ uses: actions/checkout@v4
190
+ with:
191
+ fetch-depth: 0
192
+ ref: ${{ github.event.inputs.sha || github.event.pull_request.head.sha || github.sha || github.head_ref || github.ref_name }}
193
+
194
+ - name: libCURL
195
+ id: get_libcurl
196
+ env:
197
+ CURL_VERSION: 8.6.0_6
198
+ run: |
199
+ curl.exe -o $env:RUNNER_TEMP/curl.zip -L "https://curl.se/windows/dl-${env:CURL_VERSION}/curl-${env:CURL_VERSION}-win64-mingw.zip"
200
+ mkdir $env:RUNNER_TEMP/libcurl
201
+ tar.exe -xvf $env:RUNNER_TEMP/curl.zip --strip-components=1 -C $env:RUNNER_TEMP/libcurl
202
+
203
+ - name: Build
204
+ id: cmake_build
205
+ run: |
206
+ cmake -B build -DLLAMA_CURL=ON -DCURL_LIBRARY="$env:RUNNER_TEMP/libcurl/lib/libcurl.dll.a" -DCURL_INCLUDE_DIR="$env:RUNNER_TEMP/libcurl/include"
207
+ cmake --build build --config Release -j ${env:NUMBER_OF_PROCESSORS} --target llama-server
208
+
209
+ - name: Python setup
210
+ id: setup_python
211
+ uses: actions/setup-python@v5
212
+ with:
213
+ python-version: '3.11'
214
+
215
+ - name: Tests dependencies
216
+ id: test_dependencies
217
+ run: |
218
+ pip install -r examples/server/tests/requirements.txt
219
+
220
+ - name: Copy Libcurl
221
+ id: prepare_libcurl
222
+ run: |
223
+ cp $env:RUNNER_TEMP/libcurl/bin/libcurl-x64.dll ./build/bin/Release/libcurl-x64.dll
224
+
225
+ - name: Tests
226
+ id: server_integration_tests
227
+ if: ${{ !matrix.disabled_on_pr || !github.event.pull_request }}
228
+ run: |
229
+ cd examples/server/tests
230
+ $env:PYTHONIOENCODING = ":replace"
231
+ pytest -v -x -m "not slow"
232
+
233
+ - name: Slow tests
234
+ id: server_integration_tests_slow
235
+ if: ${{ (github.event.schedule || github.event.inputs.slow_tests == 'true') && matrix.build_type == 'Release' }}
236
+ run: |
237
+ cd examples/server/tests
238
+ $env:SLOW_TESTS = "1"
239
+ pytest -v -x