Skriller0208 commited on
Commit
4c0b97f
·
verified ·
1 Parent(s): dcf56e6

Delete scripts

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. scripts/.devops/cublas.Dockerfile +0 -28
  2. scripts/.devops/main-cuda.Dockerfile +0 -40
  3. scripts/.devops/main.Dockerfile +0 -19
  4. scripts/.github/workflows/bindings-go.yml +0 -22
  5. scripts/.github/workflows/bindings-ruby.yml.disabled +0 -23
  6. scripts/.github/workflows/build.yml +0 -669
  7. scripts/.github/workflows/docker.yml +0 -57
  8. scripts/.github/workflows/examples.yml +0 -48
  9. scripts/.gitignore +0 -54
  10. scripts/.gitmodules +0 -0
  11. scripts/AUTHORS +0 -301
  12. scripts/CMakeLists.txt +0 -185
  13. scripts/LICENSE +0 -21
  14. scripts/Makefile +0 -1167
  15. scripts/Package.swift +0 -60
  16. scripts/README.md +0 -832
  17. scripts/README_sycl.md +0 -249
  18. scripts/bindings/CMakeLists.txt +0 -19
  19. scripts/bindings/go/.gitignore +0 -2
  20. scripts/bindings/go/LICENSE +0 -21
  21. scripts/bindings/go/Makefile +0 -64
  22. scripts/bindings/go/README.md +0 -100
  23. scripts/bindings/go/doc.go +0 -5
  24. scripts/bindings/go/examples/go-model-download/context.go +0 -30
  25. scripts/bindings/go/examples/go-model-download/main.go +0 -208
  26. scripts/bindings/go/examples/go-whisper/color.go +0 -22
  27. scripts/bindings/go/examples/go-whisper/flags.go +0 -147
  28. scripts/bindings/go/examples/go-whisper/main.go +0 -43
  29. scripts/bindings/go/examples/go-whisper/process.go +0 -132
  30. scripts/bindings/go/go.mod +0 -16
  31. scripts/bindings/go/go.sum +0 -23
  32. scripts/bindings/go/params.go +0 -192
  33. scripts/bindings/go/pkg/whisper/consts.go +0 -28
  34. scripts/bindings/go/pkg/whisper/context.go +0 -331
  35. scripts/bindings/go/pkg/whisper/context_test.go +0 -55
  36. scripts/bindings/go/pkg/whisper/doc.go +0 -4
  37. scripts/bindings/go/pkg/whisper/interface.go +0 -102
  38. scripts/bindings/go/pkg/whisper/model.go +0 -101
  39. scripts/bindings/go/samples/jfk.wav +0 -0
  40. scripts/bindings/go/whisper.go +0 -468
  41. scripts/bindings/go/whisper_test.go +0 -113
  42. scripts/bindings/java/.idea/uiDesigner.xml +0 -124
  43. scripts/bindings/java/README.md +0 -71
  44. scripts/bindings/java/build.gradle +0 -133
  45. scripts/bindings/java/gradle.properties +0 -6
  46. scripts/bindings/java/gradle/wrapper/gradle-wrapper.jar +0 -0
  47. scripts/bindings/java/gradle/wrapper/gradle-wrapper.properties +0 -6
  48. scripts/bindings/java/gradlew +0 -244
  49. scripts/bindings/java/gradlew.bat +0 -92
  50. scripts/bindings/java/settings.gradle +0 -1
scripts/.devops/cublas.Dockerfile DELETED
@@ -1,28 +0,0 @@
1
- ARG UBUNTU_VERSION=22.04
2
-
3
- # This needs to generally match the container host's environment.
4
- ARG CUDA_VERSION=11.7.1
5
-
6
- # Target the CUDA build image
7
- ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
8
-
9
- FROM ${BASE_CUDA_DEV_CONTAINER} as build
10
-
11
- # Unless otherwise specified, we make a fat build.
12
- ARG CUDA_DOCKER_ARCH=all
13
-
14
- RUN apt-get update && \
15
- apt-get install -y build-essential git cmake
16
-
17
- WORKDIR /app
18
-
19
- COPY . .
20
-
21
- # Set nvcc architecture
22
- ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
23
- # Enable cuBLAS
24
- ENV GGML_CUDA=1
25
-
26
- RUN make
27
-
28
- ENTRYPOINT ["/app/main"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/.devops/main-cuda.Dockerfile DELETED
@@ -1,40 +0,0 @@
1
- ARG UBUNTU_VERSION=22.04
2
- # This needs to generally match the container host's environment.
3
- ARG CUDA_VERSION=12.3.1
4
- # Target the CUDA build image
5
- ARG BASE_CUDA_DEV_CONTAINER=nvidia/cuda:${CUDA_VERSION}-devel-ubuntu${UBUNTU_VERSION}
6
- # Target the CUDA runtime image
7
- ARG BASE_CUDA_RUN_CONTAINER=nvidia/cuda:${CUDA_VERSION}-runtime-ubuntu${UBUNTU_VERSION}
8
-
9
- FROM ${BASE_CUDA_DEV_CONTAINER} AS build
10
- WORKDIR /app
11
-
12
- # Unless otherwise specified, we make a fat build.
13
- ARG CUDA_DOCKER_ARCH=all
14
- # Set nvcc architecture
15
- ENV CUDA_DOCKER_ARCH=${CUDA_DOCKER_ARCH}
16
- # Enable cuBLAS
17
- ENV GGML_CUDA=1
18
-
19
- RUN apt-get update && \
20
- apt-get install -y build-essential \
21
- && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
22
-
23
- # Ref: https://stackoverflow.com/a/53464012
24
- ENV CUDA_MAIN_VERSION=12.3
25
- ENV LD_LIBRARY_PATH /usr/local/cuda-${CUDA_MAIN_VERSION}/compat:$LD_LIBRARY_PATH
26
-
27
- COPY .. .
28
- RUN make
29
-
30
- FROM ${BASE_CUDA_RUN_CONTAINER} AS runtime
31
- ENV CUDA_MAIN_VERSION=12.3
32
- ENV LD_LIBRARY_PATH /usr/local/cuda-${CUDA_MAIN_VERSION}/compat:$LD_LIBRARY_PATH
33
- WORKDIR /app
34
-
35
- RUN apt-get update && \
36
- apt-get install -y curl ffmpeg \
37
- && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
38
-
39
- COPY --from=build /app /app
40
- ENTRYPOINT [ "bash", "-c" ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/.devops/main.Dockerfile DELETED
@@ -1,19 +0,0 @@
1
- FROM ubuntu:22.04 AS build
2
- WORKDIR /app
3
-
4
- RUN apt-get update && \
5
- apt-get install -y build-essential \
6
- && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
7
-
8
- COPY .. .
9
- RUN make
10
-
11
- FROM ubuntu:22.04 AS runtime
12
- WORKDIR /app
13
-
14
- RUN apt-get update && \
15
- apt-get install -y curl ffmpeg \
16
- && rm -rf /var/lib/apt/lists/* /var/cache/apt/archives/*
17
-
18
- COPY --from=build /app /app
19
- ENTRYPOINT [ "bash", "-c" ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/.github/workflows/bindings-go.yml DELETED
@@ -1,22 +0,0 @@
1
- name: Bindings Tests (Go)
2
- on:
3
- push:
4
- paths:
5
- - bindings/go/**
6
- - whisper.h
7
- pull_request:
8
- paths:
9
- - bindings/go/**
10
- - whisper.h
11
-
12
- jobs:
13
- ubuntu-latest:
14
- runs-on: ubuntu-latest
15
- steps:
16
- - uses: actions/setup-go@v3
17
- with:
18
- go-version: '^1.19'
19
- - uses: actions/checkout@v1
20
- - run: |
21
- cd bindings/go
22
- make test
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/.github/workflows/bindings-ruby.yml.disabled DELETED
@@ -1,23 +0,0 @@
1
- # TODO: fix this workflow file, disabled for now
2
- name: Bindings Tests (Ruby)
3
- on:
4
- push:
5
- paths:
6
- - bindings/ruby/**
7
- - whisper.h
8
- pull_request:
9
- paths:
10
- - bindings/ruby/**
11
- - whisper.h
12
-
13
- jobs:
14
- ubuntu-latest:
15
- runs-on: ubuntu-latest
16
- steps:
17
- - uses: ruby/setup-ruby@v1
18
- with:
19
- ruby-version: '3.0'
20
- - uses: actions/checkout@v1
21
- - run: |
22
- cd bindings/ruby/ext
23
- ruby extconf.rb && make
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/.github/workflows/build.yml DELETED
@@ -1,669 +0,0 @@
1
- name: CI
2
- on: [push, pull_request]
3
-
4
- env:
5
- ubuntu_image: "ubuntu:22.04"
6
-
7
- jobs:
8
- ubuntu-latest:
9
- runs-on: ubuntu-latest
10
-
11
- strategy:
12
- fail-fast: false
13
- matrix:
14
- arch: [linux/amd64, linux/arm64, linux/arm/v7, linux/ppc64le]
15
-
16
- steps:
17
- - name: Clone
18
- uses: actions/checkout@v4
19
-
20
- - name: Set up QEMU
21
- uses: docker/setup-qemu-action@v3
22
-
23
- - name: Build ${{ matrix.arch }}
24
- run: |
25
- docker run --platform ${{ matrix.arch }} --rm \
26
- -v ${{ github.workspace }}:/workspace \
27
- -w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
28
- set -e
29
- apt update
30
- apt install -y build-essential libsdl2-dev
31
- make
32
- make stream'
33
-
34
- macOS-latest:
35
- runs-on: macOS-latest
36
-
37
- steps:
38
- - name: Clone
39
- uses: actions/checkout@v4
40
-
41
- - name: Dependencies
42
- run: |
43
- brew update
44
- brew install sdl2
45
-
46
- - name: Build
47
- run: |
48
- make
49
- make stream
50
-
51
- freeBSD-latest:
52
- runs-on: macos-12
53
-
54
- steps:
55
- - name: Clone
56
- uses: actions/checkout@v4
57
-
58
- - name: Build
59
- uses: cross-platform-actions/[email protected]
60
- with:
61
- operating_system: freebsd
62
- version: '13.3'
63
- run: |
64
- sudo pkg update
65
- sudo pkg install -y gmake sdl2
66
- gmake
67
- gmake stream
68
-
69
- ubuntu-latest-gcc:
70
- runs-on: ubuntu-latest
71
-
72
- strategy:
73
- fail-fast: false
74
- matrix:
75
- build: [Debug, Release]
76
- arch: [linux/amd64, linux/arm64, linux/arm/v7, linux/ppc64le]
77
-
78
- steps:
79
- - name: Clone
80
- uses: actions/checkout@v4
81
-
82
- - name: Set up QEMU
83
- uses: docker/setup-qemu-action@v3
84
-
85
- - name: Build ${{ matrix.arch }}
86
- run: |
87
- docker run --platform ${{ matrix.arch }} --rm \
88
- -v ${{ github.workspace }}:/workspace \
89
- -w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
90
- set -e
91
- apt update
92
- apt install -y build-essential cmake libsdl2-dev
93
- cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }}
94
- make
95
- ctest -L gh --output-on-failure'
96
-
97
- ubuntu-latest-clang:
98
- runs-on: ubuntu-latest
99
-
100
- strategy:
101
- fail-fast: false
102
- matrix:
103
- build: [Debug, Release]
104
- #arch: [linux/amd64, linux/arm64, linux/arm/v7, linux/ppc64le]
105
- # TODO: arm/v7 disabled due to clang bug
106
- # https://github.com/ggerganov/whisper.cpp/actions/runs/9657764109/job/26637633042?pr=2256#step:4:1990
107
- arch: [linux/amd64, linux/arm64, linux/ppc64le]
108
-
109
- steps:
110
- - name: Clone
111
- uses: actions/checkout@v4
112
-
113
- - name: Set up QEMU
114
- uses: docker/setup-qemu-action@v3
115
-
116
- - name: Build ${{ matrix.arch }}
117
- run: |
118
- docker run --platform ${{ matrix.arch }} --rm \
119
- -v ${{ github.workspace }}:/workspace \
120
- -w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
121
- set -e
122
- apt update
123
- apt install -y clang build-essential cmake libsdl2-dev
124
- cmake . -DWHISPER_SDL2=ON -DCMAKE_BUILD_TYPE=${{ matrix.build }} -DCMAKE_CXX_COMPILER=clang++ -DCMAKE_C_COMPILER=clang
125
- make
126
- ctest -L gh --output-on-failure'
127
-
128
- ubuntu-latest-gcc-sanitized:
129
- runs-on: ubuntu-latest
130
-
131
- strategy:
132
- fail-fast: false
133
- matrix:
134
- sanitizer: [ADDRESS, THREAD, UNDEFINED]
135
- arch: [linux/amd64]
136
-
137
- steps:
138
- - name: Clone
139
- uses: actions/checkout@v4
140
-
141
- - name: Set up QEMU
142
- uses: docker/setup-qemu-action@v3
143
-
144
- - name: Build ${{ matrix.arch }}
145
- run: |
146
- docker run --platform ${{ matrix.arch }} --rm \
147
- -v ${{ github.workspace }}:/workspace \
148
- -w /workspace ${{ env.ubuntu_image }} /bin/sh -c '
149
- set -e
150
- apt update
151
- apt install -y build-essential cmake
152
- cmake . -DCMAKE_BUILD_TYPE=Debug -DWHISPER_SANITIZE_${{ matrix.sanitizer }}=ON
153
- make
154
- ctest -L gh --output-on-failure'
155
-
156
- ubuntu-22-cmake-sycl:
157
- runs-on: ubuntu-22.04
158
-
159
- strategy:
160
- fail-fast: false
161
- matrix:
162
- dwhisper_sycl: [ON]
163
- dcmake_c_compiler: [icx]
164
- dcmake_cxx_compiler: [icpx]
165
- arch: [linux/amd64, linux/arm64, linux/arm/v7, linux/ppc64le]
166
-
167
- continue-on-error: true
168
-
169
- steps:
170
- - name: Clone
171
- uses: actions/checkout@v4
172
-
173
- - name: add oneAPI to apt
174
- shell: bash
175
- run: |
176
- cd /tmp
177
- wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
178
- sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
179
- rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
180
- sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
181
-
182
- - name: install oneAPI dpcpp compiler
183
- shell: bash
184
- run: |
185
- sudo apt update
186
- sudo apt install intel-oneapi-compiler-dpcpp-cpp
187
-
188
- - name: install oneAPI MKL library
189
- shell: bash
190
- run: |
191
- sudo apt install intel-oneapi-mkl-devel
192
-
193
- - name: Clone
194
- id: checkout
195
- uses: actions/checkout@v4
196
-
197
- - name: Build
198
- id: cmake_build
199
- run: |
200
- source /opt/intel/oneapi/setvars.sh
201
- mkdir build
202
- cd build
203
- cmake -DGGML_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ..
204
- cmake --build . --config Release -j $(nproc)
205
-
206
- ubuntu-22-cmake-sycl-fp16:
207
- runs-on: ubuntu-22.04
208
-
209
- strategy:
210
- fail-fast: false
211
- matrix:
212
- dwhisper_sycl: [ON]
213
- dcmake_c_compiler: [icx]
214
- dcmake_cxx_compiler: [icpx]
215
- arch: [linux/amd64, linux/arm64, linux/arm/v7, linux/ppc64le]
216
-
217
- continue-on-error: true
218
-
219
- steps:
220
- - name: Clone
221
- uses: actions/checkout@v4
222
-
223
- - name: add oneAPI to apt
224
- shell: bash
225
- run: |
226
- cd /tmp
227
- wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
228
- sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
229
- rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB
230
- sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main"
231
-
232
- - name: install oneAPI dpcpp compiler
233
- shell: bash
234
- run: |
235
- sudo apt update
236
- sudo apt install intel-oneapi-compiler-dpcpp-cpp
237
-
238
- - name: install oneAPI MKL library
239
- shell: bash
240
- run: |
241
- sudo apt install intel-oneapi-mkl-devel
242
-
243
- - name: Clone
244
- id: checkout
245
- uses: actions/checkout@v4
246
-
247
- - name: Build
248
- id: cmake_build
249
- run: |
250
- source /opt/intel/oneapi/setvars.sh
251
- mkdir build
252
- cd build
253
- cmake -DGGML_SYCL_F16=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx ..
254
- cmake --build . --config Release -j $(nproc)
255
-
256
- windows-msys2:
257
- runs-on: windows-latest
258
-
259
- strategy:
260
- fail-fast: false
261
- matrix:
262
- include:
263
- - { sys: UCRT64, env: ucrt-x86_64, build: Release }
264
- - { sys: CLANG64, env: clang-x86_64, build: Release }
265
-
266
- steps:
267
- - name: Clone
268
- uses: actions/checkout@v4
269
-
270
- - name: Setup ${{ matrix.sys }}
271
- uses: msys2/setup-msys2@v2
272
- with:
273
- update: true
274
- msystem: ${{matrix.sys}}
275
- install: >-
276
- base-devel
277
- mingw-w64-${{matrix.env}}-toolchain
278
- mingw-w64-${{matrix.env}}-cmake
279
- mingw-w64-${{matrix.env}}-SDL2
280
- mingw-w64-${{matrix.env}}-openblas
281
-
282
- - name: Build using make
283
- shell: msys2 {0}
284
- run: |
285
- make -j $(nproc)
286
-
287
- - name: Clean after building using make
288
- shell: msys2 {0}
289
- run: |
290
- make clean
291
-
292
- - name: Build using make w/ OpenBLAS
293
- shell: msys2 {0}
294
- run: |
295
- make GGML_OPENBLAS=1 -j $(nproc)
296
-
297
- - name: Build using CMake
298
- shell: msys2 {0}
299
- run: |
300
- cmake -B build
301
- cmake --build build --config ${{ matrix.build }} -j $(nproc)
302
-
303
- - name: Clean after building using CMake
304
- shell: msys2 {0}
305
- run: |
306
- rm -rf build
307
-
308
- - name: Build using CMake w/ OpenBLAS
309
- shell: msys2 {0}
310
- run: |
311
- cmake -B build -DGGML_OPENBLAS=ON
312
- cmake --build build --config ${{ matrix.build }} -j $(nproc)
313
-
314
- windows:
315
- runs-on: windows-latest
316
-
317
- strategy:
318
- matrix:
319
- build: [Release]
320
- arch: [Win32, x64]
321
- sdl2: [ON]
322
- include:
323
- - arch: Win32
324
- s2arc: x86
325
- jnaPath: win32-x86
326
- - arch: x64
327
- s2arc: x64
328
- jnaPath: win32-x86-64
329
- - sdl2: ON
330
- s2ver: 2.28.5
331
-
332
- steps:
333
- - name: Clone
334
- uses: actions/checkout@v4
335
-
336
- - name: Add msbuild to PATH
337
- uses: microsoft/setup-msbuild@v2
338
-
339
- - name: Fetch SDL2 and set SDL2_DIR
340
- if: matrix.sdl2 == 'ON'
341
- run: |
342
- C:/msys64/usr/bin/wget.exe -qO sdl2.zip https://github.com/libsdl-org/SDL/releases/download/release-${{ matrix.s2ver }}/SDL2-devel-${{ matrix.s2ver }}-VC.zip
343
- 7z x sdl2.zip
344
- echo "SDL2_DIR=$env:GITHUB_WORKSPACE/SDL2-${{ matrix.s2ver }}/cmake" >> $env:GITHUB_ENV
345
-
346
- - name: Configure
347
- run: >
348
- cmake -S . -B ./build -A ${{ matrix.arch }}
349
- -DCMAKE_BUILD_TYPE=${{ matrix.build }}
350
- -DWHISPER_SDL2=${{ matrix.sdl2 }}
351
-
352
- - name: Build
353
- run: |
354
- cd ./build
355
- msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }}
356
-
357
- - name: Copy SDL2.dll
358
- if: matrix.sdl2 == 'ON'
359
- run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" build/bin/${{ matrix.build }}
360
-
361
- - name: Upload dll
362
- uses: actions/upload-artifact@v4
363
- with:
364
- name: ${{ matrix.jnaPath }}_whisper.dll
365
- path: build/bin/${{ matrix.build }}/whisper.dll
366
-
367
- - name: Upload binaries
368
- if: matrix.sdl2 == 'ON'
369
- uses: actions/upload-artifact@v4
370
- with:
371
- name: whisper-bin-${{ matrix.arch }}
372
- path: build/bin/${{ matrix.build }}
373
-
374
- windows-blas:
375
- runs-on: windows-latest
376
-
377
- strategy:
378
- matrix:
379
- build: [Release]
380
- arch: [Win32, x64]
381
- blas: [ON]
382
- sdl2: [ON]
383
- include:
384
- - arch: Win32
385
- obzip: https://github.com/OpenMathLib/OpenBLAS/releases/download/v0.3.25/OpenBLAS-0.3.25-x86.zip
386
- s2arc: x86
387
- - arch: x64
388
- obzip: https://github.com/OpenMathLib/OpenBLAS/releases/download/v0.3.25/OpenBLAS-0.3.25-x64.zip
389
- s2arc: x64
390
- - sdl2: ON
391
- s2ver: 2.28.5
392
-
393
- steps:
394
- - name: Clone
395
- uses: actions/checkout@v4
396
-
397
- - name: Add msbuild to PATH
398
- uses: microsoft/setup-msbuild@v2
399
-
400
- - name: Fetch OpenBLAS
401
- if: matrix.blas == 'ON'
402
- run: |
403
- C:/msys64/usr/bin/wget.exe -qO blas.zip ${{ matrix.obzip }}
404
- 7z x blas.zip -oblas -y
405
- copy blas/include/cblas.h .
406
- copy blas/include/openblas_config.h .
407
- echo "OPENBLAS_PATH=$env:GITHUB_WORKSPACE/blas" >> $env:GITHUB_ENV
408
-
409
- - name: Fetch SDL2 and set SDL2_DIR
410
- if: matrix.sdl2 == 'ON'
411
- run: |
412
- C:/msys64/usr/bin/wget.exe -qO sdl2.zip https://github.com/libsdl-org/SDL/releases/download/release-${{ matrix.s2ver }}/SDL2-devel-${{ matrix.s2ver }}-VC.zip
413
- 7z x sdl2.zip
414
- echo "SDL2_DIR=$env:GITHUB_WORKSPACE/SDL2-${{ matrix.s2ver }}/cmake" >> $env:GITHUB_ENV
415
-
416
- - name: Configure
417
- run: >
418
- cmake -S . -B ./build -A ${{ matrix.arch }}
419
- -DCMAKE_BUILD_TYPE=${{ matrix.build }}
420
- -DGGML_OPENBLAS=${{ matrix.blas }}
421
- -DCMAKE_LIBRARY_PATH="$env:OPENBLAS_PATH/lib"
422
- -DWHISPER_SDL2=${{ matrix.sdl2 }}
423
-
424
- - name: Build
425
- run: |
426
- cd ./build
427
- msbuild ALL_BUILD.vcxproj -t:build -p:configuration=${{ matrix.build }} -p:platform=${{ matrix.arch }}
428
-
429
- - name: Copy libopenblas.dll
430
- if: matrix.blas == 'ON'
431
- run: copy "$env:OPENBLAS_PATH/bin/libopenblas.dll" build/bin/${{ matrix.build }}
432
-
433
- - name: Copy SDL2.dll
434
- if: matrix.sdl2 == 'ON'
435
- run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" build/bin/${{ matrix.build }}
436
-
437
- - name: Upload binaries
438
- if: matrix.blas == 'ON' && matrix.sdl2 == 'ON'
439
- uses: actions/upload-artifact@v4
440
- with:
441
- name: whisper-blas-bin-${{ matrix.arch }}
442
- path: build/bin/${{ matrix.build }}
443
-
444
- windows-cublas:
445
- runs-on: windows-2019
446
-
447
- strategy:
448
- matrix:
449
- build: [Release]
450
- arch: [x64]
451
- cublas: [ON]
452
- sdl2: [ON]
453
- cuda-toolkit: [12.2.0, 11.8.0]
454
- include:
455
- - arch: x64
456
- s2arc: x64
457
- - sdl2: ON
458
- s2ver: 2.28.5
459
-
460
- steps:
461
- - name: Clone
462
- uses: actions/checkout@v4
463
-
464
- - name: Add msbuild to PATH
465
- uses: microsoft/setup-msbuild@v2
466
-
467
- - name: Install CUDA Toolkit
468
- id: cuda-toolkit
469
- uses: Jimver/[email protected]
470
- with:
471
- cuda: '${{ matrix.cuda-toolkit }}'
472
-
473
- - name: Fetch SDL2 and set SDL2_DIR
474
- if: matrix.sdl2 == 'ON'
475
- run: |
476
- C:/msys64/usr/bin/wget.exe -qO sdl2.zip https://github.com/libsdl-org/SDL/releases/download/release-${{ matrix.s2ver }}/SDL2-devel-${{ matrix.s2ver }}-VC.zip
477
- 7z x sdl2.zip
478
- echo "SDL2_DIR=$env:GITHUB_WORKSPACE/SDL2-${{ matrix.s2ver }}/cmake" >> $env:GITHUB_ENV
479
-
480
- - name: Configure
481
- run: >
482
- cmake -S . -B ./build -A ${{ matrix.arch }}
483
- -DCMAKE_BUILD_TYPE=${{ matrix.build }}
484
- -DGGML_CUDA=${{ matrix.cublas }}
485
- -DWHISPER_SDL2=${{ matrix.sdl2 }}
486
-
487
- - name: Build ${{ matrix.cuda-toolkit }}
488
- run: |
489
- cd ./build
490
- cmake --build . --config ${{ matrix.build }}
491
-
492
- - name: Copy CUDA DLLs
493
- run: >
494
- Copy-Item -PassThru
495
- -Path "${{ steps.cuda-toolkit.outputs.CUDA_PATH }}/bin/*.dll"
496
- -Include cudart64_*,cublas64_*,cublasLt64_*
497
- -Destination build/bin/${{ matrix.build }}
498
-
499
- - name: Copy SDL2.dll
500
- if: matrix.sdl2 == 'ON'
501
- run: copy "$env:SDL2_DIR/../lib/${{ matrix.s2arc }}/SDL2.dll" build/bin/${{ matrix.build }}
502
-
503
- - name: Upload binaries
504
- if: matrix.sdl2 == 'ON'
505
- uses: actions/upload-artifact@v4
506
- with:
507
- name: whisper-cublas-${{ matrix.cuda-toolkit }}-bin-${{ matrix.arch }}
508
- path: build/bin/${{ matrix.build }}
509
-
510
- emscripten:
511
- runs-on: ubuntu-latest
512
-
513
- strategy:
514
- matrix:
515
- build: [Release]
516
-
517
- steps:
518
- - name: Clone
519
- uses: actions/checkout@v4
520
-
521
- - name: Setup emsdk
522
- uses: mymindstorm/setup-emsdk@v14
523
-
524
- - name: Verify
525
- run: emcc -v
526
-
527
- - name: Build
528
- run: |
529
- emcmake cmake . -DCMAKE_BUILD_TYPE=${{ matrix.build }}
530
- make
531
-
532
- ios:
533
- runs-on: macos-latest
534
-
535
- strategy:
536
- matrix:
537
- build: [Release]
538
-
539
- steps:
540
- - name: Clone
541
- uses: actions/checkout@v4
542
-
543
- - name: Configure
544
- run: |
545
- cp models/for-tests-ggml-base.en.bin models/ggml-base.en.bin
546
- mkdir models/ggml-base.en-encoder.mlmodelc
547
-
548
- - name: Build objc example
549
- run: xcodebuild -project examples/whisper.objc/whisper.objc.xcodeproj -scheme whisper.objc -configuration ${{ matrix.build }} -sdk iphonesimulator build
550
-
551
- - name: Build swiftui example
552
- run: xcodebuild -project examples/whisper.swiftui/whisper.swiftui.xcodeproj -scheme WhisperCppDemo -configuration ${{ matrix.build }} -sdk iphonesimulator build
553
-
554
- android:
555
- runs-on: ubuntu-latest
556
-
557
- steps:
558
- - name: Clone
559
- uses: actions/checkout@v4
560
- with:
561
- path: whisper
562
-
563
- - name: Clone
564
- uses: actions/checkout@v4
565
- with:
566
- repository: ggerganov/ggml
567
- path: ggml
568
-
569
- - name: Install Java
570
- uses: actions/setup-java@v4
571
- with:
572
- distribution: zulu
573
- java-version: 21
574
-
575
- - name: Setup Android SDK
576
- uses: android-actions/setup-android@v3
577
-
578
- - name: Build
579
- run: |
580
- cd whisper/examples/whisper.android
581
- ./gradlew assembleRelease --no-daemon
582
-
583
- - name: Build with external ggml
584
- run: |
585
- export PATH_TO_GGML=$PWD/ggml
586
- cd whisper/examples/whisper.android
587
- ./gradlew assembleRelease --no-daemon -PGGML_HOME=$PATH_TO_GGML
588
-
589
- android_java:
590
- runs-on: ubuntu-latest
591
-
592
- steps:
593
- - name: Clone
594
- uses: actions/checkout@v4
595
-
596
- - name: set up JDK 11
597
- uses: actions/setup-java@v4
598
- with:
599
- java-version: '11'
600
- distribution: 'temurin'
601
- cache: gradle
602
-
603
- - name: Setup Android SDK
604
- uses: android-actions/setup-android@v3
605
- with:
606
- cmdline-tools-version: 9.0
607
-
608
- - name: Build
609
- run: |
610
- cd examples/whisper.android.java
611
- chmod +x ./gradlew
612
- ./gradlew assembleRelease
613
-
614
- # TODO: disabled because of following fail: https://github.com/ggerganov/whisper.cpp/actions/runs/9686220096/job/26735899598
615
- # java:
616
- # needs: [ 'windows' ]
617
- # runs-on: windows-latest
618
- # steps:
619
- # - uses: actions/checkout@v4
620
- #
621
- # - name: Install Java
622
- # uses: actions/setup-java@v4
623
- # with:
624
- # distribution: zulu
625
- # java-version: 20
626
- #
627
- # - name: Download Windows lib
628
- # uses: actions/download-artifact@v4
629
- # with:
630
- # name: win32-x86-64_whisper.dll
631
- # path: bindings/java/build/generated/resources/main/win32-x86-64
632
- #
633
- # - name: Build
634
- # run: |
635
- # models\download-ggml-model.cmd tiny.en
636
- # cd bindings/java
637
- # chmod +x ./gradlew
638
- # ./gradlew build
639
- #
640
- # - name: Upload jar
641
- # uses: actions/upload-artifact@v4
642
- # with:
643
- # name: whispercpp.jar
644
- # path: bindings/java/build/libs/whispercpp-*.jar
645
- #
646
- # - name: Publish package
647
- # if: ${{ github.ref == 'refs/heads/master' }}
648
- # uses: gradle/[email protected]
649
- # with:
650
- # arguments: publish
651
- # build-root-directory: bindings/java
652
- # env:
653
- # MAVEN_USERNAME: ${{ secrets.JIRA_USER }}
654
- # MAVEN_PASSWORD: ${{ secrets.JIRA_PASS }}
655
- # PGP_SECRET: ${{ secrets.GPG_PRIVATE_KEY }}
656
- # PGP_PASSPHRASE: ${{ secrets.GPG_PASSPHRASE }}
657
-
658
- quantize:
659
- runs-on: ubuntu-latest
660
-
661
- steps:
662
- - name: Clone
663
- uses: actions/checkout@v4
664
-
665
- - name: Test quantize
666
- run: |
667
- ./models/download-ggml-model.sh tiny.en
668
- make quantize
669
- ./quantize models/ggml-tiny.en.bin models/ggml-tiny.en-q4_0.bin q4_0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/.github/workflows/docker.yml DELETED
@@ -1,57 +0,0 @@
1
- name: Publish Docker image
2
-
3
- on:
4
- pull_request:
5
- push:
6
- branches:
7
- - master
8
-
9
- jobs:
10
- push_to_registry:
11
- name: Push Docker image to Docker Hub
12
- if: github.event.pull_request.draft == false
13
-
14
- runs-on: ubuntu-latest
15
- env:
16
- COMMIT_SHA: ${{ github.sha }}
17
- strategy:
18
- matrix:
19
- config:
20
- - { tag: "main", dockerfile: ".devops/main.Dockerfile", platform: "linux/amd64,linux/arm64" }
21
- - { tag: "main-cuda", dockerfile: ".devops/main-cuda.Dockerfile", platform: "linux/amd64" }
22
-
23
- steps:
24
- - name: Check out the repo
25
- uses: actions/checkout@v3
26
-
27
- - name: Set up QEMU
28
- uses: docker/setup-qemu-action@v3
29
-
30
- - name: Set up Docker Buildx
31
- uses: docker/setup-buildx-action@v3
32
-
33
- - name: Log in to Docker Hub
34
- uses: docker/login-action@v3
35
- with:
36
- registry: ghcr.io
37
- username: ${{ github.repository_owner }}
38
- password: ${{ secrets.GITHUB_TOKEN }}
39
-
40
- - name: Build and push Docker image (versioned)
41
- if: github.event_name == 'push'
42
- uses: docker/build-push-action@v5
43
- with:
44
- context: .
45
- push: true
46
- platforms: ${{ matrix.config.platforms }}
47
- tags: "ghcr.io/${{ github.repository }}:${{ matrix.config.tag }}-${{ env.COMMIT_SHA }}"
48
- file: ${{ matrix.config.dockerfile }}
49
-
50
- - name: Build and push Docker image (tagged)
51
- uses: docker/build-push-action@v4
52
- with:
53
- context: .
54
- push: ${{ github.event_name == 'push' }}
55
- platforms: ${{ matrix.config.platforms }}
56
- tags: "ghcr.io/${{ github.repository }}:${{ matrix.config.tag }}"
57
- file: ${{ matrix.config.dockerfile }}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/.github/workflows/examples.yml DELETED
@@ -1,48 +0,0 @@
1
- name: Examples Tests
2
- on:
3
- push:
4
- paths:
5
- - examples/addon.node/**
6
- - whisper.h
7
- pull_request:
8
- paths:
9
- - examples/addon.node/**
10
- - whisper.h
11
-
12
- jobs:
13
- addon_node-ubuntu-latest:
14
- runs-on: ubuntu-latest
15
- strategy:
16
- matrix:
17
- node-version: [ 16.x, 18.x ]
18
- steps:
19
- - name: Clone
20
- uses: actions/checkout@v1
21
-
22
- - name: Dependencies
23
- run: |
24
- sudo apt-get update
25
- sudo apt-get install build-essential
26
- sudo apt-get install cmake
27
- sudo apt-get install libsdl2-dev
28
-
29
- - name: Use Node.js ${{ matrix.node-version }}
30
- uses: actions/setup-node@v1
31
- with:
32
- node-version: ${{ matrix.node-version }}
33
- cache: 'npm'
34
-
35
- - name: Install package.json dependencies
36
- working-directory: ./examples/addon.node
37
- run: npm install
38
-
39
- - name: Compile addon.node
40
- run: npx cmake-js compile -T addon.node -B Release
41
-
42
- - name: Download test model
43
- run: |
44
- bash ./models/download-ggml-model.sh base.en
45
- - name: Test
46
- run: |
47
- cd examples/addon.node
48
- npm run test
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/.gitignore DELETED
@@ -1,54 +0,0 @@
1
- *.o
2
- *.a
3
- .cache/
4
- .coreml/
5
- .test/
6
- .vs/
7
- .vscode/
8
- .DS_Store
9
- .vimspector.json
10
- /CMakeSettings.json
11
-
12
- build/
13
- build-*/
14
-
15
- # SPM
16
- .build/
17
- .swiftpm
18
- *.metallib
19
-
20
- /main
21
- /stream
22
- /command
23
- /talk
24
- /talk-llama
25
- /bench
26
- /quantize
27
- /server
28
- /lsp
29
-
30
- arm_neon.h
31
- sync.sh
32
- libwhisper.a
33
- libwhisper.so
34
- compile_commands.json
35
-
36
- examples/arm_neon.h
37
- examples/whisper.objc/whisper.objc.xcodeproj/xcshareddata
38
- examples/whisper.objc/whisper.objc.xcodeproj/xcuserdata/
39
- examples/whisper.objc/whisper.objc.xcodeproj/project.xcworkspace/xcuserdata
40
-
41
- extra/bench-gg.txt
42
-
43
- models/*.mlmodel
44
- models/*.mlmodelc
45
- models/*.mlpackage
46
- bindings/java/.gradle/
47
- bindings/java/.idea/
48
- .idea/
49
-
50
- benchmark_results.csv
51
- cmake-build-debug/
52
- .cxx/
53
- .gradle/
54
- local.properties
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/.gitmodules DELETED
File without changes
scripts/AUTHORS DELETED
@@ -1,301 +0,0 @@
1
- # date: Tue Apr 9 20:27:03 EEST 2024
2
- # this file is auto-generated by scripts/gen-authors.sh
3
-
4
5
- 0cc4m <[email protected]>
6
- 0xsourcecode <[email protected]>
7
8
- Aarni Koskela <[email protected]>
9
- Aaron Pham <[email protected]>
10
- Aaron Taylor <[email protected]>
11
- Abhilash Majumder <[email protected]>
12
- Abitofevrything <[email protected]>
13
- AfryMask <[email protected]>
14
- Ahmad Bilal <[email protected]>
15
- AidanBeltonS <[email protected]>
16
- Akash Mahajan <[email protected]>
17
- Akash Mahajan <[email protected]>
18
- Al Hoang <[email protected]>
19
- Alan <unknown>
20
- Aleksander Andrzejewski <[email protected]>
21
- Alex Azarov <[email protected]>
22
- Alex Bacart <[email protected]>
23
- Alex Evgrashin <[email protected]>
24
- Alexandr Graschenkov <[email protected]>
25
- Alexandru Mariuti <[email protected]>
26
- Alexey Kharlamov <[email protected]>
27
- Alfredo Montesinos <[email protected]>
28
- Ali Alameh <[email protected]>
29
- Ananta Bastola <[email protected]>
30
- Andreu Huguet <[email protected]>
31
- Andrew Huynh <[email protected]>
32
- Andrew S <[email protected]>
33
- Andy Maloney <[email protected]>
34
- Anton Kostin <[email protected]>
35
- Artyom Mezin <[email protected]>
36
- Asad Memon <[email protected]>
37
- Ashraful Islam <[email protected]>
38
- AsukaMinato <[email protected]>
39
- AustinMroz <[email protected]>
40
- Avik Sengupta <[email protected]>
41
- Bader-eddine Ouaich <[email protected]>
42
- Baffin Lee <[email protected]>
43
- Ben Nortier <[email protected]>
44
- Benjamin Heiniger <[email protected]>
45
- Bo-Yi Wu <[email protected]>
46
- Boris Bliznioukov <[email protected]>
47
- Borislav Stanimirov <[email protected]>
48
- Brad Murray <[email protected]>
49
- Brian Murray <[email protected]>
50
- CRD716 <[email protected]>
51
- Canis Lupus <[email protected]>
52
- Carolinabanana <[email protected]>
53
- ChangSeok Oh <[email protected]>
54
- Chaoqun <[email protected]>
55
- Chia-Hsiang Cheng <[email protected]>
56
- Chidi Williams <[email protected]>
57
- Christian <[email protected]>
58
- Clifford Heath <[email protected]>
59
- Colin <[email protected]>
60
- DGdev91 <[email protected]>
61
- Damian Czaja <[email protected]>
62
- Daniel Bevenius <[email protected]>
63
- David <[email protected]>
64
- David Thorpe <[email protected]>
65
- Davidson Francis <[email protected]>
66
- Dener Stassun <[email protected]>
67
- Didzis Gosko <[email protected]>
68
- Digipom <[email protected]>
69
70
- Dody Suria Wijaya <[email protected]>
71
- Dr. Tom Murphy VII Ph.D <[email protected]>
72
- Duncan McConnell <[email protected]>
73
- Egor Egorov <[email protected]>
74
- Elkana Bardugo <[email protected]>
75
- Emmanuel Schmidbauer <[email protected]>
76
- Engininja2 <[email protected]>
77
- Eric Swanson <[email protected]>
78
- Eric Tendian <[email protected]>
79
- Erik Scholz <[email protected]>
80
- Evan Jones <[email protected]>
81
- Evan Martin <[email protected]>
82
83
- Evgeny Kuznetsov <[email protected]>
84
- F1L1P <[email protected]>
85
- Fangjun Kuang <[email protected]>
86
- Felix <[email protected]>
87
- Finn Voorhees <[email protected]>
88
- FlippFuzz <[email protected]>
89
- Gang Chen <[email protected]>
90
- Gavin Cai <[email protected]>
91
- George Hindle <[email protected]>
92
- Georgi Gerganov <[email protected]>
93
- GitAritron <[email protected]>
94
- GiviMAD <[email protected]>
95
- Gleicon Moraes <[email protected]>
96
- Gregor Jasny <[email protected]>
97
- Guillaume Wenzek <[email protected]>
98
- HY. Kelvin Lee <[email protected]>
99
- Halalaluyafail3 <[email protected]>
100
101
- Herman Semenov <[email protected]>
102
- Hrishikesh Barman <[email protected]>
103
- Ian Bicking <[email protected]>
104
- Ian Bull <[email protected]>
105
- Ikko Ashimine <[email protected]>
106
- InconsolableCellist <[email protected]>
107
- Ismatulla Mansurov <[email protected]>
108
- Ivan Gorin <[email protected]>
109
110
- Jack Mousseau <[email protected]>
111
- JacobLinCool <[email protected]>
112
- Jakub Ráček <[email protected]>
113
- Jared Van Bortel <[email protected]>
114
- Jay Binks <[email protected]>
115
- Jhen-Jie Hong <[email protected]>
116
- Jhen-Jie Hong <[email protected]>
117
- JidongZhang-THU <[email protected]>
118
- Jo Liss <[email protected]>
119
- Johan <[email protected]>
120
- Johannes Gäßler <[email protected]>
121
- John Balis <[email protected]>
122
- Jonathan Soo <[email protected]>
123
- Jonno <[email protected]>
124
- Joonas Pihlajamaa <[email protected]>
125
126
- Josh Bleecher Snyder <[email protected]>
127
128
- Jumper775 <[email protected]>
129
- Justine Tunney <[email protected]>
130
- KP Kaiser <[email protected]>
131
- Kamilake <[email protected]>
132
- Kartik Saranathan <[email protected]>
133
- Kasumi <[email protected]>
134
- Kawrakow <[email protected]>
135
- Kevin Brothaler <[email protected]>
136
- Konstantin Zhuravlyov <[email protected]>
137
- Kreijstal <[email protected]>
138
- Kylin <[email protected]>
139
- LBlue <[email protected]>
140
- Larry Battle <[email protected]>
141
- Laytan Laats <[email protected]>
142
- Leo Moll <[email protected]>
143
- Lexevolution <[email protected]>
144
- LittleLoli <[email protected]>
145
- Lucas Zanek <[email protected]>
146
- Luis Herrera <[email protected]>
147
- Lukas Rist <[email protected]>
148
- M. A. Ali <[email protected]>
149
- M. Eren Akbiyik <[email protected]>
150
- Maciek <[email protected]>
151
- Marcin Mielniczuk <[email protected]>
152
- Martin Warnaar <[email protected]>
153
- Matheus de Sousa <[email protected]>
154
- Mathijs de Bruin <[email protected]>
155
- Matija Pevec <[email protected]>
156
- Maximiliano Levi <[email protected]>
157
- Meng, Hengyu <[email protected]>
158
- Michael Podvitskiy <[email protected]>
159
- Michael Rienstra <[email protected]>
160
- Mikhail Grigorev <[email protected]>
161
- Mohammadreza Hendiani <[email protected]>
162
- Mohit Agarwal <[email protected]>
163
- Murilo Santana <[email protected]>
164
- Neil Chudleigh <[email protected]>
165
- Neo Zhang Jianyu <[email protected]>
166
- Neuman Vong <[email protected]>
167
- Nicholas Albion <[email protected]>
168
- Niels Mayer <[email protected]>
169
- Okabintaro <[email protected]>
170
- Oleg Sidorov <[email protected]>
171
- Oleg Sidorov <[email protected]>
172
- Ondrej Kokes <[email protected]>
173
- Ouadie EL FAROUKI <[email protected]>
174
- Paul Tsochantaris <[email protected]>
175
- Philipp Zabel <[email protected]>
176
- Philippe Normand <[email protected]>
177
- Przemysław Pawełczyk <[email protected]>
178
- Qianhe Chen <[email protected]>
179
- Radosław Gryta <[email protected]>
180
- Reinforce-II <[email protected]>
181
- Reinis Muiznieks <[email protected]>
182
- RelatedTitle <[email protected]>
183
- RhinoDevel <[email protected]>
184
- Rich Jones <[email protected]>
185
- Robin <[email protected]>
186
- Roddur Dasgupta <[email protected]>
187
- Roland Rabien <[email protected]>
188
- Rotem Dan <[email protected]>
189
- Ryan Hitchman <[email protected]>
190
- Ryan Metcalfe <[email protected]>
191
- RyanChang <[email protected]>
192
193
- Sam Pullara <[email protected]>
194
- Sanchit Gandhi <[email protected]>
195
- Sergio López <[email protected]>
196
- Siddharth Ramakrishnan <[email protected]>
197
- Simon Moisselin <[email protected]>
198
- Sindre Sorhus <[email protected]>
199
- Slava Primenko <[email protected]>
200
- Syahmi Azhar <[email protected]>
201
- Syed Jafri <[email protected]>
202
- Sơn Phan Trung <[email protected]>
203
- Taisei Mima <[email protected]>
204
- Takeshi Inoue <[email protected]>
205
- Tamotsu Takahashi <[email protected]>
206
- Taras Glek <[email protected]>
207
- Tauseef Mohiuddin <[email protected]>
208
- Thijs Raymakers <[email protected]>
209
- Thomas Fitzsimmons <[email protected]>
210
- Tiago Fassoni <[email protected]>
211
- Tienshiao Ma <[email protected]>
212
- Timothy Cronin <[email protected]>
213
- Tobrun <[email protected]>
214
215
- Tong Li <[email protected]>
216
- Topping1 <[email protected]>
217
- Travis Cline <[email protected]>
218
- UEXTM.com <[email protected]>
219
- Vadim Peretokin <[email protected]>
220
- Valentin Gosu <[email protected]>
221
- Vulcan <[email protected]>
222
- WhiteOlivierus <[email protected]>
223
- Xiang (Kevin) Li <[email protected]>
224
- Xiao-Yong Jin <[email protected]>
225
- XiaotaoChen <[email protected]>
226
- Yajing Tang <[email protected]>
227
- Yang Shen <[email protected]>
228
- Yunès <[email protected]>
229
- ZaBlazzingZephyrus <[email protected]>
230
- Zigfrid Zvezdin <[email protected]>
231
- Zollner <[email protected]>
232
- ai-at-home <[email protected]>
233
- alonfaraj <[email protected]>
234
- andypayne <[email protected]>
235
- ardfork <[email protected]>
236
- automaticcat <[email protected]>
237
- be-next <[email protected]>
238
- bert hubert <[email protected]>
239
240
- bobqianic <[email protected]>
241
- bocytko <[email protected]>
242
- boolemancer <[email protected]>
243
- boolemancer <[email protected]>
244
- bradmit <[email protected]>
245
- brunofaustino <[email protected]>
246
- bssrdf <[email protected]>
247
- byte-6174 <[email protected]>
248
- cdosoftei <[email protected]>
249
- clach04 <[email protected]>
250
- compilade <[email protected]>
251
- conradg <[email protected]>
252
- ddpasa <[email protected]>
253
- denersc <[email protected]>
254
- dscripka <[email protected]>
255
- duthils <[email protected]>
256
- ecneladis <[email protected]>
257
- faker <[email protected]>
258
- fitzsim <[email protected]>
259
- fraxy-v <[email protected]>
260
- genevera (she/her) <[email protected]>
261
- geniusnut <[email protected]>
262
- greeshmay <[email protected]>
263
- hydai <[email protected]>
264
- iamthad <[email protected]>
265
- james wolf <[email protected]>
266
- joecryptotoo <[email protected]>
267
- jorismertz <[email protected]>
268
- junkfood <[email protected]>
269
- jwijffels <[email protected]>
270
- kamranjon <[email protected]>
271
- katsu560 <[email protected]>
272
- kennethge <[email protected]>
273
- keyehzy <[email protected]>
274
- leejet <[email protected]>
275
- litong <[email protected]>
276
- lnyan <[email protected]>
277
- m.bell <[email protected]>
278
- mkiol <[email protected]>
279
- novag <[email protected]>
280
- pajowu <[email protected]>
281
- polarmoon <[email protected]>
282
- rlapray <[email protected]>
283
- sandrohanea <[email protected]>
284
- semiformal-net <[email protected]>
285
- shibukazu <[email protected]>
286
- shikokuchuo <[email protected]>
287
- slaren <[email protected]>
288
- slashlib <[email protected]>
289
- snadampal <[email protected]>
290
- st-gr <[email protected]>
291
- texmex76 <[email protected]>
292
- thefinaldegree <[email protected]>
293
- trixirt <[email protected]>
294
- ulatekh <[email protected]>
295
- undef <[email protected]>
296
- venkr <[email protected]>
297
- vicalloy <[email protected]>
298
- xdrudis <[email protected]>
299
- zhouwg <[email protected]>
300
- 布客飞龙 <[email protected]>
301
- Артём Земляк <[email protected]>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/CMakeLists.txt DELETED
@@ -1,185 +0,0 @@
1
- cmake_minimum_required(VERSION 3.5) # for add_link_options and implicit target directories.
2
- project("whisper.cpp" C CXX)
3
- project("whisper.cpp" VERSION 1.6.2)
4
- include(CheckIncludeFileCXX)
5
-
6
- set(SOVERSION 1)
7
-
8
- #set(CMAKE_WARN_DEPRECATED YES)
9
- set(CMAKE_WARN_UNUSED_CLI YES)
10
-
11
- set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
12
-
13
- if (NOT XCODE AND NOT MSVC AND NOT CMAKE_BUILD_TYPE)
14
- set(CMAKE_BUILD_TYPE Release CACHE STRING "Build type" FORCE)
15
- set_property(CACHE CMAKE_BUILD_TYPE PROPERTY STRINGS "Debug" "Release" "MinSizeRel" "RelWithDebInfo")
16
- endif()
17
-
18
- # Add path to modules
19
- list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/")
20
-
21
- set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)
22
-
23
- if (CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR)
24
- set(WHISPER_STANDALONE ON)
25
-
26
- include(git-vars)
27
-
28
- # configure project version
29
- configure_file(${CMAKE_SOURCE_DIR}/bindings/javascript/package-tmpl.json ${CMAKE_SOURCE_DIR}/bindings/javascript/package.json @ONLY)
30
- else()
31
- set(WHISPER_STANDALONE OFF)
32
- endif()
33
-
34
- if (EMSCRIPTEN)
35
- set(BUILD_SHARED_LIBS_DEFAULT OFF)
36
-
37
- option(WHISPER_WASM_SINGLE_FILE "whisper: embed WASM inside the generated whisper.js" ON)
38
-
39
- # TODO: without these, we get the following error:
40
- # wasm-ld: error: --shared-memory is disallowed by whisper.cpp.o because it was not compiled with 'atomics' or 'bulk-memory' features.
41
- set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -pthread -s TOTAL_STACK=5242880")
42
- set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -pthread -s TOTAL_STACK=5242880")
43
- else()
44
- if (MINGW)
45
- set(BUILD_SHARED_LIBS_DEFAULT OFF)
46
- else()
47
- set(BUILD_SHARED_LIBS_DEFAULT ON)
48
- endif()
49
- endif()
50
-
51
- option(BUILD_SHARED_LIBS "build shared libraries" ${BUILD_SHARED_LIBS_DEFAULT})
52
-
53
- #
54
- # option list
55
- #
56
-
57
- # general
58
- option(WHISPER_CCACHE "whisper: use ccache if available" ON)
59
-
60
- # debug
61
- option(WHISPER_ALL_WARNINGS "whisper: enable all compiler warnings" ON)
62
- option(WHISPER_ALL_WARNINGS_3RD_PARTY "whisper: enable all compiler warnings in 3rd party libs" OFF)
63
-
64
- # build
65
- option(WHISPER_FATAL_WARNINGS "whisper: enable -Werror flag" OFF)
66
-
67
- # sanitizers
68
- option(WHISPER_SANITIZE_THREAD "whisper: enable thread sanitizer" OFF)
69
- option(WHISPER_SANITIZE_ADDRESS "whisper: enable address sanitizer" OFF)
70
- option(WHISPER_SANITIZE_UNDEFINED "whisper: enable undefined sanitizer" OFF)
71
-
72
- # extra artifacts
73
- option(WHISPER_BUILD_TESTS "whisper: build tests" ${WHISPER_STANDALONE})
74
- option(WHISPER_BUILD_EXAMPLES "whisper: build examples" ${WHISPER_STANDALONE})
75
- option(WHISPER_BUILD_SERVER "whisper: build server example" ${WHISPER_STANDALONE})
76
-
77
- # 3rd party libs
78
- option(WHISPER_CURL "whisper: use libcurl to download model from an URL" OFF)
79
- option(WHISPER_SDL2 "whisper: support for libSDL2" OFF)
80
-
81
- if (CMAKE_SYSTEM_NAME MATCHES "Linux")
82
- option(WHISPER_FFMPEG "whisper: support building and linking with ffmpeg libs (avcodec, swresample, ...)" OFF)
83
- endif()
84
-
85
- option(WHISPER_COREML "whisper: enable Core ML framework" OFF)
86
- option(WHISPER_COREML_ALLOW_FALLBACK "whisper: allow non-CoreML fallback" OFF)
87
- option(WHISPER_OPENVINO "whisper: support for OpenVINO" OFF)
88
-
89
- # Required for relocatable CMake package
90
- include(${CMAKE_CURRENT_SOURCE_DIR}/cmake/build-info.cmake)
91
-
92
- # override ggml options
93
- set(GGML_CCACHE ${WHISPER_CCACHE})
94
- set(GGML_SANITIZE_THREAD ${WHISPER_SANITIZE_THREAD})
95
- set(GGML_SANITIZE_ADDRESS ${WHISPER_SANITIZE_ADDRESS})
96
- set(GGML_SANITIZE_UNDEFINED ${WHISPER_SANITIZE_UNDEFINED})
97
- set(GGML_ALL_WARNINGS ${WHISPER_ALL_WARNINGS})
98
- set(GGML_FATAL_WARNINGS ${WHISPER_FATAL_WARNINGS})
99
-
100
- # transition helpers
101
- function (whisper_option_depr TYPE OLD NEW)
102
- if (${OLD})
103
- message(${TYPE} "${OLD} is deprecated and will be removed in the future.\nUse ${NEW} instead\n")
104
- set(${NEW} ON)
105
- endif()
106
- endfunction()
107
-
108
- whisper_option_depr(FATAL_ERROR WHISPER_CUBLAS GGML_CUDA)
109
- whisper_option_depr(WARNING WHISPER_CUDA GGML_CUDA)
110
- whisper_option_depr(WARNING WHISPER_KOMPUTE GGML_KOMPUTE)
111
- whisper_option_depr(WARNING WHISPER_METAL GGML_METAL)
112
- whisper_option_depr(WARNING WHISPER_METAL_EMBED_LIBRARY GGML_METAL_EMBED_LIBRARY)
113
- whisper_option_depr(WARNING WHISPER_NATIVE GGML_NATIVE)
114
- whisper_option_depr(WARNING WHISPER_OPENMP GGML_OPENMP)
115
- whisper_option_depr(WARNING WHISPER_RPC GGML_RPC)
116
- whisper_option_depr(WARNING WHISPER_SYCL GGML_SYCL)
117
- whisper_option_depr(WARNING WHISPER_SYCL_F16 GGML_SYCL_F16)
118
-
119
- #
120
- # build the library
121
- #
122
-
123
- if (NOT TARGET ggml)
124
- add_subdirectory(ggml)
125
- # ... otherwise assume ggml is added by a parent CMakeLists.txt
126
- endif()
127
- add_subdirectory(src)
128
-
129
- #
130
- # install
131
- #
132
-
133
- include(GNUInstallDirs)
134
- include(CMakePackageConfigHelpers)
135
-
136
- set(WHISPER_BUILD_NUMBER ${BUILD_NUMBER})
137
- set(WHISPER_BUILD_COMMIT ${BUILD_COMMIT})
138
- set(WHISPER_INSTALL_VERSION ${CMAKE_PROJECT_VERSION})
139
-
140
- set(WHISPER_INCLUDE_INSTALL_DIR ${CMAKE_INSTALL_INCLUDEDIR} CACHE PATH "Location of header files")
141
- set(WHISPER_LIB_INSTALL_DIR ${CMAKE_INSTALL_LIBDIR} CACHE PATH "Location of library files")
142
- set(WHISPER_BIN_INSTALL_DIR ${CMAKE_INSTALL_BINDIR} CACHE PATH "Location of binary files")
143
-
144
- get_directory_property(WHISPER_TRANSIENT_DEFINES COMPILE_DEFINITIONS)
145
-
146
- set_target_properties(whisper PROPERTIES PUBLIC_HEADER ${CMAKE_CURRENT_SOURCE_DIR}/include/whisper.h)
147
- install(TARGETS whisper LIBRARY PUBLIC_HEADER)
148
-
149
- configure_package_config_file(
150
- ${CMAKE_CURRENT_SOURCE_DIR}/cmake/whisper-config.cmake.in
151
- ${CMAKE_CURRENT_BINARY_DIR}/whisper-config.cmake
152
- INSTALL_DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/whisper
153
- PATH_VARS
154
- WHISPER_INCLUDE_INSTALL_DIR
155
- WHISPER_LIB_INSTALL_DIR
156
- WHISPER_BIN_INSTALL_DIR )
157
-
158
- write_basic_package_version_file(
159
- ${CMAKE_CURRENT_BINARY_DIR}/whisper-version.cmake
160
- VERSION ${WHISPER_INSTALL_VERSION}
161
- COMPATIBILITY SameMajorVersion)
162
-
163
- install(FILES ${CMAKE_CURRENT_BINARY_DIR}/whisper-config.cmake
164
- ${CMAKE_CURRENT_BINARY_DIR}/whisper-version.cmake
165
- DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake/whisper)
166
-
167
- configure_file(cmake/whisper.pc.in
168
- "${CMAKE_CURRENT_BINARY_DIR}/whisper.pc"
169
- @ONLY)
170
-
171
- install(FILES "${CMAKE_CURRENT_BINARY_DIR}/whisper.pc"
172
- DESTINATION lib/pkgconfig)
173
-
174
- #
175
- # programs, examples and tests
176
- #
177
-
178
- if (WHISPER_BUILD_TESTS AND NOT CMAKE_JS_VERSION)
179
- #include(CTest)
180
- #add_subdirectory(tests)
181
- endif ()
182
-
183
- if (WHISPER_BUILD_EXAMPLES)
184
- add_subdirectory(examples)
185
- endif()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/LICENSE DELETED
@@ -1,21 +0,0 @@
1
- MIT License
2
-
3
- Copyright (c) 2023-2024 The ggml authors
4
-
5
- Permission is hereby granted, free of charge, to any person obtaining a copy
6
- of this software and associated documentation files (the "Software"), to deal
7
- in the Software without restriction, including without limitation the rights
8
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
- copies of the Software, and to permit persons to whom the Software is
10
- furnished to do so, subject to the following conditions:
11
-
12
- The above copyright notice and this permission notice shall be included in all
13
- copies or substantial portions of the Software.
14
-
15
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
- SOFTWARE.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/Makefile DELETED
@@ -1,1167 +0,0 @@
1
- # Define the default target now so that it is always the first target
2
- BUILD_TARGETS = \
3
- main \
4
- bench \
5
- quantize \
6
- server \
7
- tests/test-c.o
8
-
9
- # Binaries only useful for tests
10
- TEST_TARGETS = \
11
- tests/test-backend-ops
12
-
13
- # Deprecation aliases
14
- ifdef WHISPER_CUBLAS
15
- $(error WHISPER_CUBLAS is removed. Use GGML_CUDA instead.)
16
- endif
17
-
18
- ifdef WHISPER_CUDA
19
- GGML_CUDA := 1
20
- DEPRECATE_WARNING := 1
21
- endif
22
-
23
- ifdef WHISPER_KOMPUTE
24
- GGML_KOMPUTE := 1
25
- DEPRECATE_WARNING := 1
26
- endif
27
-
28
- ifdef WHISPER_METAL
29
- GGML_METAL := 1
30
- DEPRECATE_WARNING := 1
31
- endif
32
-
33
- ifdef WHISPER_OPENMP
34
- GGML_OPENMP := 1
35
- DEPRECATE_WARNING := 1
36
- endif
37
-
38
- ifdef WHISPER_RPC
39
- GGML_RPC := 1
40
- DEPRECATE_WARNING := 1
41
- endif
42
-
43
- ifdef WHISPER_SYCL
44
- GGML_SYCL := 1
45
- DEPRECATE_WARNING := 1
46
- endif
47
-
48
- ifdef WHISPER_SYCL_F16
49
- GGML_SYCL_F16 := 1
50
- DEPRECATE_WARNING := 1
51
- endif
52
-
53
- ifdef WHISPER_OPENBLAS
54
- GGML_OPENBLAS := 1
55
- DEPRECATE_WARNING := 1
56
- endif
57
-
58
- ifdef WHISPER_OPENBLAS64
59
- GGML_OPENBLAS64 := 1
60
- DEPRECATE_WARNING := 1
61
- endif
62
-
63
- ifdef WHISPER_BLIS
64
- GGML_BLIS := 1
65
- DEPRECATE_WARNING := 1
66
- endif
67
-
68
- ifdef WHISPER_NO_WHISPERFILE
69
- GGML_NO_WHISPERFILE := 1
70
- DEPRECATE_WARNING := 1
71
- endif
72
-
73
- ifdef WHISPER_NO_ACCELERATE
74
- GGML_NO_ACCELERATE := 1
75
- DEPRECATE_WARNING := 1
76
- endif
77
-
78
- ifdef WHISPER_NO_OPENMP
79
- GGML_NO_OPENMP := 1
80
- DEPRECATE_WARNING := 1
81
- endif
82
-
83
- ifdef WHISPER_NO_METAL
84
- GGML_NO_METAL := 1
85
- DEPRECATE_WARNING := 1
86
- endif
87
-
88
- ifndef UNAME_S
89
- UNAME_S := $(shell uname -s)
90
- endif
91
-
92
- ifndef UNAME_P
93
- UNAME_P := $(shell uname -p)
94
- endif
95
-
96
- ifndef UNAME_M
97
- UNAME_M := $(shell uname -m)
98
- endif
99
-
100
- # In GNU make default CXX is g++ instead of c++. Let's fix that so that users
101
- # of non-gcc compilers don't have to provide g++ alias or wrapper.
102
- DEFCC := cc
103
- DEFCXX := c++
104
- ifeq ($(origin CC),default)
105
- CC := $(DEFCC)
106
- endif
107
- ifeq ($(origin CXX),default)
108
- CXX := $(DEFCXX)
109
- endif
110
-
111
- # Mac OS + Arm can report x86_64
112
- # ref: https://github.com/ggerganov/whisper.cpp/issues/66#issuecomment-1282546789
113
- ifeq ($(UNAME_S),Darwin)
114
- ifndef GGML_NO_METAL
115
- GGML_METAL := 1
116
- endif
117
-
118
- GGML_NO_OPENMP := 1
119
-
120
- ifneq ($(UNAME_P),arm)
121
- SYSCTL_M := $(shell sysctl -n hw.optional.arm64 2>/dev/null)
122
- ifeq ($(SYSCTL_M),1)
123
- # UNAME_P := arm
124
- # UNAME_M := arm64
125
- warn := $(warning Your arch is announced as x86_64, but it seems to actually be ARM64. Not fixing that can lead to bad performance. For more info see: https://github.com/ggerganov/whisper.cpp/issues/66\#issuecomment-1282546789)
126
- endif
127
- endif
128
- endif
129
-
130
- ifdef GGML_METAL
131
- GGML_METAL_EMBED_LIBRARY := 1
132
- endif
133
-
134
- ifdef GGML_RPC
135
- BUILD_TARGETS += rpc-server
136
- endif
137
-
138
- ifeq ($(shell sdl2-config --cflags --libs 2>/dev/null),)
139
- else
140
- BUILD_TARGETS += \
141
- command \
142
- stream \
143
- lsp \
144
- talk \
145
- talk-llama
146
- endif
147
-
148
- default: $(BUILD_TARGETS)
149
-
150
- test: $(TEST_TARGETS)
151
- @failures=0; \
152
- for test_target in $(TEST_TARGETS); do \
153
- echo "Running test $$test_target..."; \
154
- ./$$test_target; \
155
- if [ $$? -ne 0 ]; then \
156
- printf 'Test %s FAILED!\n\n' $$test_target; \
157
- failures=$$(( failures + 1 )); \
158
- else \
159
- printf 'Test %s passed.\n\n' $$test_target; \
160
- fi; \
161
- done; \
162
- failures=$$(( failures + $$? )); \
163
- if [ $$failures -gt 0 ]; then \
164
- printf '\n%s tests failed.\n' $$failures; \
165
- exit 1; \
166
- fi
167
- @echo 'All tests passed.'
168
-
169
- all: $(BUILD_TARGETS) $(TEST_TARGETS)
170
-
171
- ifdef RISCV_CROSS_COMPILE
172
- CC := riscv64-unknown-linux-gnu-gcc
173
- CXX := riscv64-unknown-linux-gnu-g++
174
- endif
175
-
176
- #
177
- # Compile flags
178
- #
179
-
180
- # keep standard at C11 and C++11
181
- MK_CPPFLAGS = -Iggml/include -Iggml/src -Iinclude -Isrc -Iexamples
182
- MK_CFLAGS = -std=c11 -fPIC
183
- MK_CXXFLAGS = -std=c++11 -fPIC
184
- MK_NVCCFLAGS = -std=c++11
185
-
186
- ifndef WHISPER_NO_CCACHE
187
- CCACHE := $(shell which ccache)
188
- ifdef CCACHE
189
- export CCACHE_SLOPPINESS = time_macros
190
- $(info I ccache found, compilation results will be cached. Disable with WHISPER_NO_CCACHE.)
191
- CC := $(CCACHE) $(CC)
192
- CXX := $(CCACHE) $(CXX)
193
- else
194
- $(info I ccache not found. Consider installing it for faster compilation.)
195
- endif # CCACHE
196
- endif # WHISPER_NO_CCACHE
197
-
198
- # clock_gettime came in POSIX.1b (1993)
199
- # CLOCK_MONOTONIC came in POSIX.1-2001 / SUSv3 as optional
200
- # posix_memalign came in POSIX.1-2001 / SUSv3
201
- # M_PI is an XSI extension since POSIX.1-2001 / SUSv3, came in XPG1 (1985)
202
- MK_CPPFLAGS += -D_XOPEN_SOURCE=600
203
-
204
- # Somehow in OpenBSD whenever POSIX conformance is specified
205
- # some string functions rely on locale_t availability,
206
- # which was introduced in POSIX.1-2008, forcing us to go higher
207
- ifeq ($(UNAME_S),OpenBSD)
208
- MK_CPPFLAGS += -U_XOPEN_SOURCE -D_XOPEN_SOURCE=700
209
- endif
210
-
211
- # Data types, macros and functions related to controlling CPU affinity and
212
- # some memory allocation are available on Linux through GNU extensions in libc
213
- ifeq ($(UNAME_S),Linux)
214
- MK_CPPFLAGS += -D_GNU_SOURCE
215
- endif
216
-
217
- # RLIMIT_MEMLOCK came in BSD, is not specified in POSIX.1,
218
- # and on macOS its availability depends on enabling Darwin extensions
219
- # similarly on DragonFly, enabling BSD extensions is necessary
220
- ifeq ($(UNAME_S),Darwin)
221
- MK_CPPFLAGS += -D_DARWIN_C_SOURCE
222
- endif
223
- ifeq ($(UNAME_S),DragonFly)
224
- MK_CPPFLAGS += -D__BSD_VISIBLE
225
- endif
226
-
227
- # alloca is a non-standard interface that is not visible on BSDs when
228
- # POSIX conformance is specified, but not all of them provide a clean way
229
- # to enable it in such cases
230
- ifeq ($(UNAME_S),FreeBSD)
231
- MK_CPPFLAGS += -D__BSD_VISIBLE
232
- endif
233
- ifeq ($(UNAME_S),NetBSD)
234
- MK_CPPFLAGS += -D_NETBSD_SOURCE
235
- endif
236
- ifeq ($(UNAME_S),OpenBSD)
237
- MK_CPPFLAGS += -D_BSD_SOURCE
238
- endif
239
-
240
- ifdef GGML_SCHED_MAX_COPIES
241
- MK_CPPFLAGS += -DGGML_SCHED_MAX_COPIES=$(GGML_SCHED_MAX_COPIES)
242
- endif
243
-
244
- ifdef WHISPER_DEBUG
245
- MK_CFLAGS += -O0 -g
246
- MK_CXXFLAGS += -O0 -g
247
- MK_LDFLAGS += -g
248
- MK_NVCCFLAGS += -O0 -g
249
-
250
- ifeq ($(UNAME_S),Linux)
251
- MK_CPPFLAGS += -D_GLIBCXX_ASSERTIONS
252
- endif
253
- else
254
- MK_CPPFLAGS += -DNDEBUG
255
- MK_CFLAGS += -O3
256
- MK_CXXFLAGS += -O3
257
- MK_NVCCFLAGS += -O3
258
- endif
259
-
260
- ifdef WHISPER_SANITIZE_THREAD
261
- MK_CFLAGS += -fsanitize=thread -g
262
- MK_CXXFLAGS += -fsanitize=thread -g
263
- MK_LDFLAGS += -fsanitize=thread -g
264
- endif
265
-
266
- ifdef WHISPER_SANITIZE_ADDRESS
267
- MK_CFLAGS += -fsanitize=address -fno-omit-frame-pointer -g
268
- MK_CXXFLAGS += -fsanitize=address -fno-omit-frame-pointer -g
269
- MK_LDFLAGS += -fsanitize=address -fno-omit-frame-pointer -g
270
- endif
271
-
272
- ifdef WHISPER_SANITIZE_UNDEFINED
273
- MK_CFLAGS += -fsanitize=undefined -g
274
- MK_CXXFLAGS += -fsanitize=undefined -g
275
- MK_LDFLAGS += -fsanitize=undefined -g
276
- endif
277
-
278
- ifdef WHISPER_SERVER_VERBOSE
279
- MK_CPPFLAGS += -DSERVER_VERBOSE=$(WHISPER_SERVER_VERBOSE)
280
- endif
281
-
282
- ifdef WHISPER_SERVER_SSL
283
- MK_CPPFLAGS += -DCPPHTTPLIB_OPENSSL_SUPPORT
284
- MK_LDFLAGS += -lssl -lcrypto
285
- endif
286
-
287
- ifdef WHISPER_DISABLE_LOGS
288
- MK_CPPFLAGS += -DLOG_DISABLE_LOGS
289
- endif # WHISPER_DISABLE_LOGS
290
-
291
- # warnings
292
- WARN_FLAGS = \
293
- -Wall \
294
- -Wextra \
295
- -Wpedantic \
296
- -Wcast-qual \
297
- -Wno-unused-function
298
-
299
- MK_CFLAGS += \
300
- $(WARN_FLAGS) \
301
- -Wshadow \
302
- -Wstrict-prototypes \
303
- -Wpointer-arith \
304
- -Wmissing-prototypes \
305
- -Werror=implicit-int \
306
- -Werror=implicit-function-declaration
307
-
308
- MK_CXXFLAGS += \
309
- $(WARN_FLAGS) \
310
- -Wmissing-declarations \
311
- -Wmissing-noreturn
312
-
313
- ifeq ($(WHISPER_FATAL_WARNINGS),1)
314
- MK_CFLAGS += -Werror
315
- MK_CXXFLAGS += -Werror
316
- endif
317
-
318
- # this version of Apple ld64 is buggy
319
- ifneq '' '$(findstring dyld-1015.7,$(shell $(CC) $(LDFLAGS) -Wl,-v 2>&1))'
320
- MK_CPPFLAGS += -DHAVE_BUGGY_APPLE_LINKER
321
- endif
322
-
323
- # OS specific
324
- # TODO: support Windows
325
- ifneq '' '$(filter $(UNAME_S),Linux Darwin FreeBSD NetBSD OpenBSD Haiku)'
326
- MK_CFLAGS += -pthread
327
- MK_CXXFLAGS += -pthread
328
- endif
329
-
330
- # detect Windows
331
- ifneq ($(findstring _NT,$(UNAME_S)),)
332
- _WIN32 := 1
333
- endif
334
-
335
- # library name prefix
336
- ifneq ($(_WIN32),1)
337
- LIB_PRE := lib
338
- endif
339
-
340
- # Dynamic Shared Object extension
341
- ifneq ($(_WIN32),1)
342
- DSO_EXT := .so
343
- else
344
- DSO_EXT := .dll
345
- endif
346
-
347
- # Windows Sockets 2 (Winsock) for network-capable apps
348
- ifeq ($(_WIN32),1)
349
- LWINSOCK2 := -lws2_32
350
- endif
351
-
352
- ifdef WHISPER_GPROF
353
- MK_CFLAGS += -pg
354
- MK_CXXFLAGS += -pg
355
- endif
356
-
357
- # Architecture specific
358
- # TODO: probably these flags need to be tweaked on some architectures
359
- # feel free to update the Makefile for your architecture and send a pull request or issue
360
-
361
- ifndef RISCV
362
-
363
- ifeq ($(UNAME_M),$(filter $(UNAME_M),x86_64 i686 amd64))
364
- # Use all CPU extensions that are available:
365
- MK_CFLAGS += -march=native -mtune=native
366
- HOST_CXXFLAGS += -march=native -mtune=native
367
-
368
- # Usage AVX-only
369
- #MK_CFLAGS += -mfma -mf16c -mavx
370
- #MK_CXXFLAGS += -mfma -mf16c -mavx
371
-
372
- # Usage SSSE3-only (Not is SSE3!)
373
- #MK_CFLAGS += -mssse3
374
- #MK_CXXFLAGS += -mssse3
375
- endif
376
-
377
- ifneq '' '$(findstring mingw,$(shell $(CC) -dumpmachine))'
378
- # The stack is only 16-byte aligned on Windows, so don't let gcc emit aligned moves.
379
- # https://gcc.gnu.org/bugzilla/show_bug.cgi?id=54412
380
- # https://github.com/ggerganov/llama.cpp/issues/2922
381
- MK_CFLAGS += -Xassembler -muse-unaligned-vector-move
382
- MK_CXXFLAGS += -Xassembler -muse-unaligned-vector-move
383
-
384
- # Target Windows 8 for PrefetchVirtualMemory
385
- MK_CPPFLAGS += -D_WIN32_WINNT=0x602
386
- endif
387
-
388
- ifneq ($(filter aarch64%,$(UNAME_M)),)
389
- # Apple M1, M2, etc.
390
- # Raspberry Pi 3, 4, Zero 2 (64-bit)
391
- # Nvidia Jetson
392
- MK_CFLAGS += -mcpu=native
393
- MK_CXXFLAGS += -mcpu=native
394
- JETSON_RELEASE_INFO = $(shell jetson_release)
395
- ifdef JETSON_RELEASE_INFO
396
- ifneq ($(filter TX2%,$(JETSON_RELEASE_INFO)),)
397
- JETSON_EOL_MODULE_DETECT = 1
398
- CC = aarch64-unknown-linux-gnu-gcc
399
- cxx = aarch64-unknown-linux-gnu-g++
400
- endif
401
- endif
402
- endif
403
-
404
- ifneq ($(filter armv6%,$(UNAME_M)),)
405
- # Raspberry Pi 1, Zero
406
- MK_CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access
407
- MK_CXXFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access
408
- endif
409
-
410
- ifneq ($(filter armv7%,$(UNAME_M)),)
411
- # Raspberry Pi 2
412
- MK_CFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
413
- MK_CXXFLAGS += -mfpu=neon-fp-armv8 -mfp16-format=ieee -mno-unaligned-access -funsafe-math-optimizations
414
- endif
415
-
416
- ifneq ($(filter armv8%,$(UNAME_M)),)
417
- # Raspberry Pi 3, 4, Zero 2 (32-bit)
418
- MK_CFLAGS += -mfp16-format=ieee -mno-unaligned-access
419
- MK_CXXFLAGS += -mfp16-format=ieee -mno-unaligned-access
420
- endif
421
-
422
- ifneq ($(filter ppc64%,$(UNAME_M)),)
423
- POWER9_M := $(shell grep "POWER9" /proc/cpuinfo)
424
- ifneq (,$(findstring POWER9,$(POWER9_M)))
425
- MK_CFLAGS += -mcpu=power9
426
- MK_CXXFLAGS += -mcpu=power9
427
- endif
428
- endif
429
-
430
- ifneq ($(filter ppc64le%,$(UNAME_M)),)
431
- MK_CFLAGS += -mcpu=powerpc64le
432
- MK_CXXFLAGS += -mcpu=powerpc64le
433
- CUDA_POWER_ARCH = 1
434
- endif
435
-
436
- ifneq ($(filter loongarch64%,$(UNAME_M)),)
437
- MK_CFLAGS += -mlasx
438
- MK_CXXFLAGS += -mlasx
439
- endif
440
-
441
- else
442
- MK_CFLAGS += -march=rv64gcv -mabi=lp64d
443
- MK_CXXFLAGS += -march=rv64gcv -mabi=lp64d
444
- endif
445
-
446
- ifndef GGML_NO_ACCELERATE
447
- # Mac OS - include Accelerate framework.
448
- # `-framework Accelerate` works both with Apple Silicon and Mac Intel
449
- ifeq ($(UNAME_S),Darwin)
450
- MK_CPPFLAGS += -DGGML_USE_ACCELERATE -DGGML_USE_BLAS
451
- MK_CPPFLAGS += -DACCELERATE_NEW_LAPACK
452
- MK_CPPFLAGS += -DACCELERATE_LAPACK_ILP64
453
- MK_LDFLAGS += -framework Accelerate
454
- OBJ_GGML += ggml/src/ggml-blas.o
455
- endif
456
- endif # GGML_NO_ACCELERATE
457
-
458
- ifndef GGML_NO_OPENMP
459
- MK_CPPFLAGS += -DGGML_USE_OPENMP
460
- MK_CFLAGS += -fopenmp
461
- MK_CXXFLAGS += -fopenmp
462
- endif # GGML_NO_OPENMP
463
-
464
- ifdef GGML_OPENBLAS
465
- MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas)
466
- MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas)
467
- MK_LDFLAGS += $(shell pkg-config --libs openblas)
468
- OBJ_GGML += ggml/src/ggml-blas.o
469
- endif # GGML_OPENBLAS
470
-
471
- ifdef GGML_OPENBLAS64
472
- MK_CPPFLAGS += -DGGML_USE_BLAS $(shell pkg-config --cflags-only-I openblas64)
473
- MK_CFLAGS += $(shell pkg-config --cflags-only-other openblas64)
474
- MK_LDFLAGS += $(shell pkg-config --libs openblas64)
475
- OBJ_GGML += ggml/src/ggml-blas.o
476
- endif # GGML_OPENBLAS64
477
-
478
- ifdef GGML_BLIS
479
- MK_CPPFLAGS += -DGGML_USE_BLAS -I/usr/local/include/blis -I/usr/include/blis
480
- MK_LDFLAGS += -lblis -L/usr/local/lib
481
- OBJ_GGML += ggml/src/ggml-blas.o
482
- endif # GGML_BLIS
483
-
484
- ifdef GGML_RPC
485
- MK_CPPFLAGS += -DGGML_USE_RPC
486
- OBJ_GGML += ggml/src/ggml-rpc.o
487
- endif # GGML_RPC
488
-
489
- OBJ_CUDA_TMPL = $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-wmma*.cu))
490
- OBJ_CUDA_TMPL += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/mmq*.cu))
491
-
492
- ifdef GGML_CUDA_FA_ALL_QUANTS
493
- OBJ_CUDA_TMPL += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*.cu))
494
- else
495
- OBJ_CUDA_TMPL += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*q4_0-q4_0.cu))
496
- OBJ_CUDA_TMPL += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*q8_0-q8_0.cu))
497
- OBJ_CUDA_TMPL += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/template-instances/fattn-vec*f16-f16.cu))
498
- endif # GGML_CUDA_FA_ALL_QUANTS
499
-
500
- ifdef GGML_CUDA
501
- ifneq ('', '$(wildcard /opt/cuda)')
502
- CUDA_PATH ?= /opt/cuda
503
- else
504
- CUDA_PATH ?= /usr/local/cuda
505
- endif
506
-
507
- #MK_CPPFLAGS += -DGGML_USE_CUDA -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include -DGGML_CUDA_USE_GRAPHS
508
- #MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcufft -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L$(CUDA_PATH)/lib64/stubs -L/usr/lib/wsl/lib
509
- MK_CPPFLAGS += -DGGML_USE_CUDA -I$(CUDA_PATH)/include -I$(CUDA_PATH)/targets/$(UNAME_M)-linux/include
510
- MK_LDFLAGS += -lcuda -lcublas -lculibos -lcudart -lcublasLt -lpthread -ldl -lrt -L$(CUDA_PATH)/lib64 -L/usr/lib64 -L$(CUDA_PATH)/targets/$(UNAME_M)-linux/lib -L$(CUDA_PATH)/lib64/stubs -L/usr/lib/wsl/lib
511
- MK_NVCCFLAGS += -use_fast_math
512
-
513
- OBJ_GGML += ggml/src/ggml-cuda.o
514
- OBJ_GGML += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/*.cu))
515
- OBJ_GGML += $(OBJ_CUDA_TMPL)
516
-
517
- #OBJ_WHISPER += src/whisper-mel-cuda.o
518
-
519
- ifdef WHISPER_FATAL_WARNINGS
520
- MK_NVCCFLAGS += -Werror all-warnings
521
- endif # WHISPER_FATAL_WARNINGS
522
-
523
- ifndef JETSON_EOL_MODULE_DETECT
524
- MK_NVCCFLAGS += --forward-unknown-to-host-compiler
525
- endif # JETSON_EOL_MODULE_DETECT
526
-
527
- ifdef WHISPER_DEBUG
528
- MK_NVCCFLAGS += -lineinfo
529
- endif # WHISPER_DEBUG
530
-
531
- ifdef GGML_CUDA_DEBUG
532
- MK_NVCCFLAGS += --device-debug
533
- endif # GGML_CUDA_DEBUG
534
-
535
- ifdef GGML_CUDA_NVCC
536
- NVCC = $(CCACHE) $(GGML_CUDA_NVCC)
537
- else
538
- NVCC = $(CCACHE) nvcc
539
- endif #GGML_CUDA_NVCC
540
-
541
- ifdef CUDA_DOCKER_ARCH
542
- MK_NVCCFLAGS += -Wno-deprecated-gpu-targets -arch=$(CUDA_DOCKER_ARCH)
543
- else ifndef CUDA_POWER_ARCH
544
- MK_NVCCFLAGS += -arch=native
545
- endif # CUDA_DOCKER_ARCH
546
-
547
- ifdef GGML_CUDA_FORCE_DMMV
548
- MK_NVCCFLAGS += -DGGML_CUDA_FORCE_DMMV
549
- endif # GGML_CUDA_FORCE_DMMV
550
-
551
- ifdef GGML_CUDA_FORCE_MMQ
552
- MK_NVCCFLAGS += -DGGML_CUDA_FORCE_MMQ
553
- endif # GGML_CUDA_FORCE_MMQ
554
-
555
- ifdef GGML_CUDA_DMMV_X
556
- MK_NVCCFLAGS += -DGGML_CUDA_DMMV_X=$(GGML_CUDA_DMMV_X)
557
- else
558
- MK_NVCCFLAGS += -DGGML_CUDA_DMMV_X=32
559
- endif # GGML_CUDA_DMMV_X
560
-
561
- ifdef GGML_CUDA_MMV_Y
562
- MK_NVCCFLAGS += -DGGML_CUDA_MMV_Y=$(GGML_CUDA_MMV_Y)
563
- else ifdef GGML_CUDA_DMMV_Y
564
- MK_NVCCFLAGS += -DGGML_CUDA_MMV_Y=$(GGML_CUDA_DMMV_Y) # for backwards compatibility
565
- else
566
- MK_NVCCFLAGS += -DGGML_CUDA_MMV_Y=1
567
- endif # GGML_CUDA_MMV_Y
568
-
569
- ifdef GGML_CUDA_F16
570
- MK_NVCCFLAGS += -DGGML_CUDA_F16
571
- endif # GGML_CUDA_F16
572
-
573
- ifdef GGML_CUDA_DMMV_F16
574
- MK_NVCCFLAGS += -DGGML_CUDA_F16
575
- endif # GGML_CUDA_DMMV_F16
576
-
577
- ifdef GGML_CUDA_KQUANTS_ITER
578
- MK_NVCCFLAGS += -DK_QUANTS_PER_ITERATION=$(GGML_CUDA_KQUANTS_ITER)
579
- else
580
- MK_NVCCFLAGS += -DK_QUANTS_PER_ITERATION=2
581
- endif
582
-
583
- ifdef GGML_CUDA_PEER_MAX_BATCH_SIZE
584
- MK_NVCCFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=$(GGML_CUDA_PEER_MAX_BATCH_SIZE)
585
- else
586
- MK_NVCCFLAGS += -DGGML_CUDA_PEER_MAX_BATCH_SIZE=128
587
- endif # GGML_CUDA_PEER_MAX_BATCH_SIZE
588
-
589
- ifdef GGML_CUDA_NO_PEER_COPY
590
- MK_NVCCFLAGS += -DGGML_CUDA_NO_PEER_COPY
591
- endif # GGML_CUDA_NO_PEER_COPY
592
-
593
- ifdef GGML_CUDA_CCBIN
594
- MK_NVCCFLAGS += -ccbin $(GGML_CUDA_CCBIN)
595
- endif # GGML_CUDA_CCBIN
596
-
597
- ifdef GGML_CUDA_FA_ALL_QUANTS
598
- MK_NVCCFLAGS += -DGGML_CUDA_FA_ALL_QUANTS
599
- endif # GGML_CUDA_FA_ALL_QUANTS
600
-
601
- ifdef JETSON_EOL_MODULE_DETECT
602
- define NVCC_COMPILE
603
- $(NVCC) -I. -Icommon -D_XOPEN_SOURCE=600 -D_GNU_SOURCE -DNDEBUG -DGGML_USE_CUDA -I/usr/local/cuda/include -I/opt/cuda/include -I/usr/local/cuda/targets/aarch64-linux/include -std=c++11 -O3 $(NVCCFLAGS) $(CPPFLAGS) -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@
604
- endef # NVCC_COMPILE
605
- else
606
- define NVCC_COMPILE
607
- $(NVCC) $(NVCCFLAGS) $(CPPFLAGS) -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@
608
- endef # NVCC_COMPILE
609
- endif # JETSON_EOL_MODULE_DETECT
610
-
611
- ggml/src/ggml-cuda/%.o: \
612
- ggml/src/ggml-cuda/%.cu \
613
- ggml/include/ggml.h \
614
- ggml/src/ggml-common.h \
615
- ggml/src/ggml-cuda/common.cuh
616
- $(NVCC_COMPILE)
617
-
618
- ggml/src/ggml-cuda.o: \
619
- ggml/src/ggml-cuda.cu \
620
- ggml/include/ggml.h \
621
- ggml/include/ggml-backend.h \
622
- ggml/include/ggml-cuda.h \
623
- ggml/src/ggml-backend-impl.h \
624
- ggml/src/ggml-common.h \
625
- $(wildcard ggml/src/ggml-cuda/*.cuh)
626
- $(NVCC_COMPILE)
627
-
628
- #src/whisper-mel-cuda.o: src/whisper-mel-cuda.cu src/whisper-mel-cuda.hpp
629
- # $(NVCC) $(NVCCFLAGS) $(CPPFLAGS) -Xcompiler "$(CUDA_CXXFLAGS)" -c $< -o $@
630
-
631
- endif # GGML_CUDA
632
-
633
- ifdef GGML_VULKAN
634
- MK_CPPFLAGS += -DGGML_USE_VULKAN
635
- MK_LDFLAGS += -lvulkan
636
- OBJ_GGML += ggml/src/ggml-vulkan.o
637
-
638
- ifdef GGML_VULKAN_CHECK_RESULTS
639
- MK_CPPFLAGS += -DGGML_VULKAN_CHECK_RESULTS
640
- endif
641
-
642
- ifdef GGML_VULKAN_DEBUG
643
- MK_CPPFLAGS += -DGGML_VULKAN_DEBUG
644
- endif
645
-
646
- ifdef GGML_VULKAN_MEMORY_DEBUG
647
- MK_CPPFLAGS += -DGGML_VULKAN_MEMORY_DEBUG
648
- endif
649
-
650
- ifdef GGML_VULKAN_VALIDATE
651
- MK_CPPFLAGS += -DGGML_VULKAN_VALIDATE
652
- endif
653
-
654
- ifdef GGML_VULKAN_RUN_TESTS
655
- MK_CPPFLAGS += -DGGML_VULKAN_RUN_TESTS
656
- endif
657
-
658
- ggml/src/ggml-vulkan.o: \
659
- ggml/src/ggml-vulkan.cpp \
660
- ggml/include/ggml-vulkan.h
661
- $(CXX) $(CXXFLAGS) -c $< -o $@
662
- endif # GGML_VULKAN
663
-
664
- ifdef GGML_HIPBLAS
665
- ifeq ($(wildcard /opt/rocm),)
666
- ROCM_PATH ?= /usr
667
- AMDGPU_TARGETS ?= $(shell $(shell which amdgpu-arch))
668
- else
669
- ROCM_PATH ?= /opt/rocm
670
- AMDGPU_TARGETS ?= $(shell $(ROCM_PATH)/llvm/bin/amdgpu-arch)
671
- endif
672
-
673
- GGML_CUDA_DMMV_X ?= 32
674
- GGML_CUDA_MMV_Y ?= 1
675
- GGML_CUDA_KQUANTS_ITER ?= 2
676
-
677
- MK_CPPFLAGS += -DGGML_USE_HIPBLAS -DGGML_USE_CUDA
678
-
679
- ifdef GGML_HIP_UMA
680
- MK_CPPFLAGS += -DGGML_HIP_UMA
681
- endif # GGML_HIP_UMA
682
-
683
- MK_LDFLAGS += -L$(ROCM_PATH)/lib -Wl,-rpath=$(ROCM_PATH)/lib
684
- MK_LDFLAGS += -L$(ROCM_PATH)/lib64 -Wl,-rpath=$(ROCM_PATH)/lib64
685
- MK_LDFLAGS += -lhipblas -lamdhip64 -lrocblas
686
-
687
- HIPCC ?= $(CCACHE) $(ROCM_PATH)/bin/hipcc
688
-
689
- HIPFLAGS += $(addprefix --offload-arch=,$(AMDGPU_TARGETS))
690
- HIPFLAGS += -DGGML_CUDA_DMMV_X=$(GGML_CUDA_DMMV_X)
691
- HIPFLAGS += -DGGML_CUDA_MMV_Y=$(GGML_CUDA_MMV_Y)
692
- HIPFLAGS += -DK_QUANTS_PER_ITERATION=$(GGML_CUDA_KQUANTS_ITER)
693
-
694
- ifdef GGML_CUDA_FORCE_DMMV
695
- HIPFLAGS += -DGGML_CUDA_FORCE_DMMV
696
- endif # GGML_CUDA_FORCE_DMMV
697
-
698
- ifdef GGML_CUDA_NO_PEER_COPY
699
- HIPFLAGS += -DGGML_CUDA_NO_PEER_COPY
700
- endif # GGML_CUDA_NO_PEER_COPY
701
-
702
- OBJ_GGML += ggml/src/ggml-cuda.o
703
- OBJ_GGML += $(patsubst %.cu,%.o,$(wildcard ggml/src/ggml-cuda/*.cu))
704
- OBJ_GGML += $(OBJ_CUDA_TMPL)
705
-
706
- ggml/src/ggml-cuda.o: \
707
- ggml/src/ggml-cuda.cu \
708
- ggml/include/ggml.h \
709
- ggml/include/ggml-backend.h \
710
- ggml/include/ggml-cuda.h \
711
- ggml/src/ggml-backend-impl.h \
712
- ggml/src/ggml-common.h \
713
- $(wildcard ggml/src/ggml-cuda/*.cuh)
714
- $(HIPCC) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<
715
-
716
- ggml/src/ggml-cuda/%.o: \
717
- ggml/src/ggml-cuda/%.cu \
718
- ggml/include/ggml.h \
719
- ggml/src/ggml-common.h \
720
- ggml/src/ggml-cuda/common.cuh
721
- $(HIPCC) $(CXXFLAGS) $(HIPFLAGS) -x hip -c -o $@ $<
722
- endif # GGML_HIPBLAS
723
-
724
- ifdef GGML_METAL
725
- MK_CPPFLAGS += -DGGML_USE_METAL
726
- MK_LDFLAGS += -framework Foundation -framework Metal -framework MetalKit
727
- OBJ_GGML += ggml/src/ggml-metal.o
728
- ifdef GGML_METAL_NDEBUG
729
- MK_CPPFLAGS += -DGGML_METAL_NDEBUG
730
- endif
731
-
732
- ifdef GGML_METAL_EMBED_LIBRARY
733
- MK_CPPFLAGS += -DGGML_METAL_EMBED_LIBRARY
734
- OBJ_GGML += ggml/src/ggml-metal-embed.o
735
- endif
736
- endif # GGML_METAL
737
-
738
- ifdef WHISPER_COREML
739
- MK_CXXFLAGS += -DWHISPER_USE_COREML
740
- LDFLAGS += -framework Foundation -framework CoreML
741
-
742
- ifdef WHISPER_COREML_ALLOW_FALLBACK
743
- MK_CXXFLAGS += -DWHISPER_COREML_ALLOW_FALLBACK
744
- endif
745
- endif
746
-
747
- # ===
748
-
749
- ifdef GGML_METAL
750
- ggml/src/ggml-metal.o: \
751
- ggml/src/ggml-metal.m \
752
- ggml/include/ggml-metal.h \
753
- ggml/include/ggml.h
754
- $(CC) $(CFLAGS) -c $< -o $@
755
-
756
- ifdef GGML_METAL_EMBED_LIBRARY
757
- ggml/src/ggml-metal-embed.o: \
758
- ggml/src/ggml-metal.metal \
759
- ggml/src/ggml-common.h
760
- @echo "Embedding Metal library"
761
- @sed -e '/#include "ggml-common.h"/r ggml/src/ggml-common.h' -e '/#include "ggml-common.h"/d' < ggml/src/ggml-metal.metal > ggml/src/ggml-metal-embed.metal
762
- $(eval TEMP_ASSEMBLY=$(shell mktemp))
763
- @echo ".section __DATA, __ggml_metallib" > $(TEMP_ASSEMBLY)
764
- @echo ".globl _ggml_metallib_start" >> $(TEMP_ASSEMBLY)
765
- @echo "_ggml_metallib_start:" >> $(TEMP_ASSEMBLY)
766
- @echo ".incbin \"ggml/src/ggml-metal-embed.metal\"" >> $(TEMP_ASSEMBLY)
767
- @echo ".globl _ggml_metallib_end" >> $(TEMP_ASSEMBLY)
768
- @echo "_ggml_metallib_end:" >> $(TEMP_ASSEMBLY)
769
- @$(AS) $(TEMP_ASSEMBLY) -o $@
770
- @rm -f ${TEMP_ASSEMBLY}
771
- endif
772
- endif # GGML_METAL
773
-
774
- ifdef WHISPER_COREML
775
- src/coreml/whisper-encoder.o: src/coreml/whisper-encoder.mm src/coreml/whisper-encoder.h
776
- $(CXX) -O3 -I . -fobjc-arc -c src/coreml/whisper-encoder.mm -o src/coreml/whisper-encoder.o
777
-
778
- src/coreml/whisper-encoder-impl.o: src/coreml/whisper-encoder-impl.m src/coreml/whisper-encoder-impl.h
779
- $(CXX) -O3 -I . -fobjc-arc -c src/coreml/whisper-encoder-impl.m -o src/coreml/whisper-encoder-impl.o
780
-
781
- OBJ_WHISPER += src/coreml/whisper-encoder.o src/coreml/whisper-encoder-impl.o
782
- endif
783
-
784
- OBJ_GGML += \
785
- ggml/src/ggml.o \
786
- ggml/src/ggml-alloc.o \
787
- ggml/src/ggml-backend.o \
788
- ggml/src/ggml-quants.o \
789
- ggml/src/ggml-aarch64.o
790
-
791
- OBJ_WHISPER += \
792
- src/whisper.o
793
-
794
- OBJ_COMMON += \
795
- examples/common.o \
796
- examples/common-ggml.o \
797
- examples/grammar-parser.o
798
-
799
- OBJ_SDL += \
800
- examples/common-sdl.o
801
-
802
- OBJ_ALL = $(OBJ_GGML) $(OBJ_WHISPER) $(OBJ_COMMON) $(OBJ_SDL)
803
-
804
- LIB_GGML = $(LIB_PRE)ggml$(DSO_EXT)
805
- LIB_GGML_S = $(LIB_PRE)ggml.a
806
-
807
- LIB_WHISPER = $(LIB_PRE)whisper$(DSO_EXT)
808
- LIB_WHISPER_S = $(LIB_PRE)whisper.a
809
-
810
- LIB_COMMON = $(LIB_PRE)common$(DSO_EXT)
811
- LIB_COMMON_S = $(LIB_PRE)common.a
812
-
813
- LIB_COMMON_SDL = $(LIB_PRE)common-sdl$(DSO_EXT)
814
- LIB_COMMON_SDL_S = $(LIB_PRE)common-sdl.a
815
-
816
- LIB_ALL = $(LIB_GGML) $(LIB_WHISPER) $(LIB_COMMON) $(LIB_COMMON_SDL)
817
- LIB_ALL_S = $(LIB_GGML_S) $(LIB_WHISPER_S) $(LIB_COMMON_S) $(LIB_COMMON_SDL_S)
818
-
819
- GF_CC := $(CC)
820
- include scripts/get-flags.mk
821
-
822
- # combine build flags with cmdline overrides
823
- override CPPFLAGS := $(MK_CPPFLAGS) $(CPPFLAGS)
824
- override CFLAGS := $(CPPFLAGS) $(MK_CFLAGS) $(GF_CFLAGS) $(CFLAGS)
825
- BASE_CXXFLAGS := $(MK_CXXFLAGS) $(CXXFLAGS)
826
- override CXXFLAGS := $(BASE_CXXFLAGS) $(HOST_CXXFLAGS) $(GF_CXXFLAGS) $(CPPFLAGS)
827
- override NVCCFLAGS := $(MK_NVCCFLAGS) $(NVCCFLAGS)
828
- override LDFLAGS := $(MK_LDFLAGS) $(LDFLAGS)
829
-
830
- # identify CUDA host compiler
831
- ifdef GGML_CUDA
832
- GF_CC := $(NVCC) $(NVCCFLAGS) 2>/dev/null .c -Xcompiler
833
- include scripts/get-flags.mk
834
- CUDA_CXXFLAGS := $(BASE_CXXFLAGS) $(GF_CXXFLAGS) -Wno-pedantic
835
- endif
836
-
837
- ifdef WHISPER_CURL
838
- override CXXFLAGS := $(CXXFLAGS) -DWHISPER_USE_CURL
839
- override LDFLAGS := $(LDFLAGS) -lcurl
840
- endif
841
-
842
- #
843
- # Print build information
844
- #
845
-
846
- $(info I whisper.cpp build info: )
847
- $(info I UNAME_S: $(UNAME_S))
848
- $(info I UNAME_P: $(UNAME_P))
849
- $(info I UNAME_M: $(UNAME_M))
850
- $(info I CFLAGS: $(CFLAGS))
851
- $(info I CXXFLAGS: $(CXXFLAGS))
852
- $(info I NVCCFLAGS: $(NVCCFLAGS))
853
- $(info I LDFLAGS: $(LDFLAGS))
854
- $(info I CC: $(shell $(CC) --version | head -n 1))
855
- $(info I CXX: $(shell $(CXX) --version | head -n 1))
856
- ifdef GGML_CUDA
857
- $(info I NVCC: $(shell $(NVCC) --version | tail -n 1))
858
- CUDA_VERSION := $(shell $(NVCC) --version | grep -oP 'release (\K[0-9]+\.[0-9])')
859
- ifeq ($(shell awk -v "v=$(CUDA_VERSION)" 'BEGIN { print (v < 11.7) }'),1)
860
-
861
- ifndef CUDA_DOCKER_ARCH
862
- ifndef CUDA_POWER_ARCH
863
- $(error I ERROR: For CUDA versions < 11.7 a target CUDA architecture must be explicitly provided via environment variable CUDA_DOCKER_ARCH, e.g. by running "export CUDA_DOCKER_ARCH=compute_XX" on Unix-like systems, where XX is the minimum compute capability that the code needs to run on. A list with compute capabilities can be found here: https://developer.nvidia.com/cuda-gpus )
864
- endif # CUDA_POWER_ARCH
865
- endif # CUDA_DOCKER_ARCH
866
-
867
- endif # eq ($(shell echo "$(CUDA_VERSION) < 11.7" | bc),1)
868
- endif # GGML_CUDA
869
- $(info )
870
-
871
- ifdef DEPRECATE_WARNING
872
- $(info !!! DEPRECATION WARNING !!!)
873
- $(info The following WHISPER_ options are deprecated and will be removed in the future. Use the GGML_ prefix instead)
874
- $(info - WHISPER_CUDA)
875
- $(info - WHISPER_METAL)
876
- $(info - WHISPER_OPENMP)
877
- $(info - WHISPER_RPC)
878
- $(info - WHISPER_SYCL)
879
- $(info - WHISPER_SYCL_F16)
880
- $(info - WHISPER_OPENBLAS)
881
- $(info - WHISPER_OPENBLAS64)
882
- $(info - WHISPER_BLIS)
883
- $(info - WHISPER_NO_LLAMAFILE)
884
- $(info - WHISPER_NO_ACCELERATE)
885
- $(info - WHISPER_NO_OPENMP)
886
- $(info - WHISPER_NO_METAL)
887
- $(info )
888
- endif
889
-
890
- #
891
- # Build libraries
892
- #
893
-
894
- # ggml
895
-
896
- ggml/src/ggml.o: \
897
- ggml/src/ggml.c \
898
- ggml/include/ggml.h
899
- $(CC) $(CFLAGS) -c $< -o $@
900
-
901
- ggml/src/ggml-alloc.o: \
902
- ggml/src/ggml-alloc.c \
903
- ggml/include/ggml.h \
904
- ggml/include/ggml-alloc.h
905
- $(CC) $(CFLAGS) -c $< -o $@
906
-
907
- ggml/src/ggml-backend.o: \
908
- ggml/src/ggml-backend.c \
909
- ggml/include/ggml.h \
910
- ggml/include/ggml-backend.h
911
- $(CC) $(CFLAGS) -c $< -o $@
912
-
913
- ggml/src/ggml-quants.o: \
914
- ggml/src/ggml-quants.c \
915
- ggml/include/ggml.h \
916
- ggml/src/ggml-quants.h \
917
- ggml/src/ggml-common.h
918
- $(CC) $(CFLAGS) -c $< -o $@
919
-
920
- ggml/src/ggml-aarch64.o: \
921
- ggml/src/ggml-aarch64.c \
922
- ggml/include/ggml.h \
923
- ggml/src/ggml-aarch64.h \
924
- ggml/src/ggml-common.h
925
- $(CC) $(CFLAGS) -c $< -o $@
926
-
927
- ggml/src/ggml-blas.o: \
928
- ggml/src/ggml-blas.cpp \
929
- ggml/include/ggml-blas.h
930
- $(CXX) $(CXXFLAGS) -c $< -o $@
931
-
932
- ifdef GGML_LLAMAFILE
933
- ggml/src/sgemm.o: \
934
- ggml/src/sgemm.cpp \
935
- ggml/src/sgemm.h \
936
- ggml/include/ggml.h
937
- $(CXX) $(CXXFLAGS) -c $< -o $@
938
- endif # GGML_LLAMAFILE
939
-
940
- ifdef GGML_RPC
941
- ggml/src/ggml-rpc.o: \
942
- ggml/src/ggml-rpc.cpp \
943
- ggml/include/ggml-rpc.h
944
- $(CXX) $(CXXFLAGS) -c $< -o $@
945
- endif # GGML_RPC
946
-
947
- $(LIB_GGML): \
948
- $(OBJ_GGML)
949
- $(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
950
-
951
- $(LIB_GGML_S): \
952
- $(OBJ_GGML)
953
- ar rcs $(LIB_GGML_S) $^
954
-
955
- # whisper
956
-
957
- src/whisper.o: \
958
- src/whisper.cpp \
959
- src/whisper-mel.hpp \
960
- include/whisper.h \
961
- ggml/include/ggml.h \
962
- ggml/include/ggml-alloc.h \
963
- ggml/include/ggml-backend.h \
964
- ggml/include/ggml-cuda.h \
965
- ggml/include/ggml-metal.h
966
- $(CXX) $(CXXFLAGS) -c $< -o $@
967
-
968
- $(LIB_WHISPER): \
969
- $(OBJ_WHISPER) \
970
- $(LIB_GGML)
971
- $(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
972
-
973
- $(LIB_WHISPER_S): \
974
- $(OBJ_WHISPER) \
975
- $(OBJ_GGML)
976
- ar rcs $(LIB_WHISPER_S) $^
977
-
978
- # common
979
-
980
- examples/common.o: \
981
- examples/common.cpp \
982
- examples/common.h
983
- $(CXX) $(CXXFLAGS) -c $< -o $@
984
-
985
- examples/common-ggml.o: \
986
- examples/common-ggml.cpp \
987
- examples/common-ggml.h
988
- $(CXX) $(CXXFLAGS) -c $< -o $@
989
-
990
- $(LIB_COMMON): \
991
- $(OBJ_COMMON)
992
- $(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS)
993
-
994
- $(LIB_COMMON_S): \
995
- $(OBJ_COMMON)
996
- ar rcs $(LIB_COMMON_S) $^
997
-
998
- # common-sdl
999
-
1000
- CFLAGS_SDL=$(shell sdl2-config --cflags)
1001
- LDFLAGS_SDL=$(shell sdl2-config --libs)
1002
-
1003
- examples/common-sdl.o: \
1004
- examples/common-sdl.cpp \
1005
- examples/common-sdl.h
1006
- $(CXX) $(CXXFLAGS) $(CFLAGS_SDL) -c $< -o $@
1007
-
1008
- $(LIB_COMMON_SDL): \
1009
- $(OBJ_SDL)
1010
- $(CXX) $(CXXFLAGS) -shared -fPIC -o $@ $^ $(LDFLAGS) $(LDFLAGS_SDL)
1011
-
1012
- $(LIB_COMMON_SDL_S): \
1013
- $(OBJ_SDL)
1014
- ar rcs $(LIB_COMMON_SDL_S) $^
1015
-
1016
- clean:
1017
- rm -vrf *.dot $(BUILD_TARGETS) $(TEST_TARGETS)
1018
- rm -rvf src/*.o
1019
- rm -rvf src/coreml/*.o
1020
- rm -rvf tests/*.o
1021
- rm -rvf examples/*.o
1022
- rm -rvf *.a
1023
- rm -rvf *.dll
1024
- rm -rvf *.so
1025
- rm -rvf *.dot
1026
- rm -rvf ggml/*.a
1027
- rm -rvf ggml/*.dll
1028
- rm -rvf ggml/*.so
1029
- rm -vrf ggml/src/*.o
1030
- rm -vrf ggml/src/ggml-metal-embed.metal
1031
- rm -vrf ggml/src/ggml-cuda/*.o
1032
- rm -vrf ggml/src/ggml-cuda/template-instances/*.o
1033
- rm -rvf $(BUILD_TARGETS)
1034
- rm -rvf $(TEST_TARGETS)
1035
- find examples -type f -name "*.o" -delete
1036
-
1037
- #
1038
- # Examples
1039
- #
1040
-
1041
- # $< is the first prerequisite, i.e. the source file.
1042
- # Explicitly compile this to an object file so that it can be cached with ccache.
1043
- # The source file is then filtered out from $^ (the list of all prerequisites) and the object file is added instead.
1044
-
1045
- # Helper function that replaces .c, .cpp, and .cu file endings with .o:
1046
- GET_OBJ_FILE = $(patsubst %.c,%.o,$(patsubst %.cpp,%.o,$(patsubst %.cu,%.o,$(1))))
1047
-
1048
- main: examples/main/main.cpp \
1049
- $(OBJ_GGML) $(OBJ_WHISPER) $(OBJ_COMMON)
1050
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1051
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1052
-
1053
- bench: examples/bench/bench.cpp \
1054
- $(OBJ_GGML) $(OBJ_WHISPER) $(OBJ_COMMON)
1055
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1056
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1057
-
1058
- quantize: examples/quantize/quantize.cpp \
1059
- $(OBJ_GGML) $(OBJ_WHISPER) $(OBJ_COMMON)
1060
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1061
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1062
-
1063
- server: examples/server/server.cpp \
1064
- $(OBJ_GGML) $(OBJ_WHISPER) $(OBJ_COMMON)
1065
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1066
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LWINSOCK2)
1067
-
1068
- command: examples/command/command.cpp \
1069
- $(OBJ_GGML) $(OBJ_WHISPER) $(OBJ_COMMON) $(OBJ_SDL)
1070
- $(CXX) $(CXXFLAGS) $(CFLAGS_SDL) -c $< -o $(call GET_OBJ_FILE, $<)
1071
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LDFLAGS_SDL)
1072
-
1073
- stream: examples/stream/stream.cpp \
1074
- $(OBJ_GGML) $(OBJ_WHISPER) $(OBJ_COMMON) $(OBJ_SDL)
1075
- $(CXX) $(CXXFLAGS) $(CFLAGS_SDL) -c $< -o $(call GET_OBJ_FILE, $<)
1076
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LDFLAGS_SDL)
1077
-
1078
- lsp: examples/lsp/lsp.cpp \
1079
- $(OBJ_GGML) $(OBJ_WHISPER) $(OBJ_COMMON) $(OBJ_SDL)
1080
- $(CXX) $(CXXFLAGS) $(CFLAGS_SDL) -c $< -o $(call GET_OBJ_FILE, $<)
1081
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LDFLAGS_SDL)
1082
-
1083
- talk: examples/talk/talk.cpp examples/talk/gpt-2.cpp \
1084
- $(OBJ_GGML) $(OBJ_WHISPER) $(OBJ_COMMON) $(OBJ_SDL)
1085
- $(CXX) $(CXXFLAGS) $(CFLAGS_SDL) -c $< -o $(call GET_OBJ_FILE, $<)
1086
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LDFLAGS_SDL)
1087
-
1088
- talk-llama: examples/talk-llama/talk-llama.cpp examples/talk-llama/llama.cpp examples/talk-llama/llama-vocab.cpp examples/talk-llama/llama-grammar.cpp examples/talk-llama/llama-sampling.cpp examples/talk-llama/unicode.cpp examples/talk-llama/unicode-data.cpp \
1089
- $(OBJ_GGML) $(OBJ_WHISPER) $(OBJ_COMMON) $(OBJ_SDL)
1090
- $(CXX) $(CXXFLAGS) $(CFLAGS_SDL) -c $< -o $(call GET_OBJ_FILE, $<)
1091
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS) $(LDFLAGS_SDL)
1092
-
1093
- #
1094
- # Tests
1095
- #
1096
-
1097
- tests: $(TEST_TARGETS)
1098
-
1099
- tests/test-c.o: tests/test-c.c include/whisper.h
1100
- $(CC) $(CFLAGS) -c $(filter-out %.h,$^) -o $@
1101
-
1102
- tests/test-backend-ops: tests/test-backend-ops.cpp \
1103
- $(OBJ_GGML)
1104
- $(CXX) $(CXXFLAGS) -c $< -o $(call GET_OBJ_FILE, $<)
1105
- $(CXX) $(CXXFLAGS) $(filter-out %.h $<,$^) $(call GET_OBJ_FILE, $<) -o $@ $(LDFLAGS)
1106
-
1107
- #
1108
- # Audio samples
1109
- #
1110
-
1111
- # download a few audio samples into folder "./samples":
1112
- .PHONY: samples
1113
- samples:
1114
- @echo "Downloading samples..."
1115
- @mkdir -p samples
1116
- @wget --quiet --show-progress -O samples/gb0.ogg https://upload.wikimedia.org/wikipedia/commons/2/22/George_W._Bush%27s_weekly_radio_address_%28November_1%2C_2008%29.oga
1117
- @wget --quiet --show-progress -O samples/gb1.ogg https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg
1118
- @wget --quiet --show-progress -O samples/hp0.ogg https://upload.wikimedia.org/wikipedia/en/d/d4/En.henryfphillips.ogg
1119
- @wget --quiet --show-progress -O samples/mm1.wav https://cdn.openai.com/whisper/draft-20220913a/micro-machines.wav
1120
- @wget --quiet --show-progress -O samples/a13.mp3 https://upload.wikimedia.org/wikipedia/commons/transcoded/6/6f/Apollo13-wehaveaproblem.ogg/Apollo13-wehaveaproblem.ogg.mp3
1121
- @wget --quiet --show-progress -O samples/diffusion2023-07-03.flac https://archive.org/download/diffusion2023-07-03/diffusion2023-07-03.flac
1122
- @echo "Converting to 16-bit WAV ..."
1123
- @ffmpeg -loglevel -0 -y -i samples/gb0.ogg -ar 16000 -ac 1 -c:a pcm_s16le samples/gb0.wav
1124
- @ffmpeg -loglevel -0 -y -i samples/gb1.ogg -ar 16000 -ac 1 -c:a pcm_s16le samples/gb1.wav
1125
- @ffmpeg -loglevel -0 -y -i samples/hp0.ogg -ar 16000 -ac 1 -c:a pcm_s16le samples/hp0.wav
1126
- @rm samples/*.ogg
1127
- @ffmpeg -loglevel -0 -y -i samples/mm1.wav -ar 16000 -ac 1 -c:a pcm_s16le samples/mm0.wav
1128
- @rm samples/mm1.wav
1129
- @ffmpeg -loglevel -0 -y -i samples/a13.mp3 -ar 16000 -ac 1 -c:a pcm_s16le -ss 00:00:00 -to 00:00:30 samples/a13.wav
1130
- @rm samples/a13.mp3
1131
- @ffmpeg -loglevel -0 -y -i samples/diffusion2023-07-03.flac -ar 16000 -ac 1 -c:a pcm_s16le samples/diffusion2023-07-03.wav
1132
- @rm samples/diffusion2023-07-03.flac
1133
-
1134
- #
1135
- # Models
1136
- #
1137
-
1138
- # if not already downloaded, the following targets download the specified model and
1139
- # runs it on all samples in the folder "./samples":
1140
-
1141
- .PHONY: tiny.en
1142
- .PHONY: tiny
1143
- .PHONY: base.en
1144
- .PHONY: base
1145
- .PHONY: small.en
1146
- .PHONY: small
1147
- .PHONY: medium.en
1148
- .PHONY: medium
1149
- .PHONY: large-v1
1150
- .PHONY: large-v2
1151
- .PHONY: large-v3
1152
-
1153
- tiny.en tiny base.en base small.en small medium.en medium large-v1 large-v2 large-v3: main
1154
- bash ./models/download-ggml-model.sh $@
1155
- @echo ""
1156
- @echo "==============================================="
1157
- @echo "Running $@ on all samples in ./samples ..."
1158
- @echo "==============================================="
1159
- @echo ""
1160
- @for f in samples/*.wav; do \
1161
- echo "----------------------------------------------" ; \
1162
- echo "[+] Running $@ on $$f ... (run 'ffplay $$f' to listen)" ; \
1163
- echo "----------------------------------------------" ; \
1164
- echo "" ; \
1165
- ./main -m models/[email protected] -f $$f ; \
1166
- echo "" ; \
1167
- done
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/Package.swift DELETED
@@ -1,60 +0,0 @@
1
- // swift-tools-version:5.5
2
-
3
- import PackageDescription
4
-
5
- let package = Package(
6
- name: "whisper",
7
- platforms: [
8
- .macOS(.v12),
9
- .iOS(.v14),
10
- .watchOS(.v4),
11
- .tvOS(.v14)
12
- ],
13
- products: [
14
- .library(name: "whisper", targets: ["whisper"]),
15
- ],
16
- targets: [
17
- .target(
18
- name: "whisper",
19
- path: ".",
20
- exclude: [
21
- "bindings",
22
- "cmake",
23
- "coreml",
24
- "examples",
25
- "extra",
26
- "models",
27
- "samples",
28
- "tests",
29
- "CMakeLists.txt",
30
- "Makefile"
31
- ],
32
- sources: [
33
- "ggml/src/ggml.c",
34
- "src/whisper.cpp",
35
- "ggml/src/ggml-aarch64.c",
36
- "ggml/src/ggml-alloc.c",
37
- "ggml/src/ggml-backend.c",
38
- "ggml/src/ggml-quants.c",
39
- "ggml/src/ggml-metal.m"
40
- ],
41
- resources: [.process("ggml-metal.metal")],
42
- publicHeadersPath: "spm-headers",
43
- cSettings: [
44
- .unsafeFlags(["-Wno-shorten-64-to-32", "-O3", "-DNDEBUG"]),
45
- .define("GGML_USE_ACCELERATE"),
46
- .unsafeFlags(["-fno-objc-arc"]),
47
- .define("GGML_USE_METAL")
48
- // NOTE: NEW_LAPACK will required iOS version 16.4+
49
- // We should consider add this in the future when we drop support for iOS 14
50
- // (ref: ref: https://developer.apple.com/documentation/accelerate/1513264-cblas_sgemm?language=objc)
51
- // .define("ACCELERATE_NEW_LAPACK"),
52
- // .define("ACCELERATE_LAPACK_ILP64")
53
- ],
54
- linkerSettings: [
55
- .linkedFramework("Accelerate")
56
- ]
57
- )
58
- ],
59
- cxxLanguageStandard: .cxx11
60
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/README.md DELETED
@@ -1,832 +0,0 @@
1
- # whisper.cpp
2
-
3
- ![whisper.cpp](https://user-images.githubusercontent.com/1991296/235238348-05d0f6a4-da44-4900-a1de-d0707e75b763.jpeg)
4
-
5
- [![Actions Status](https://github.com/ggerganov/whisper.cpp/workflows/CI/badge.svg)](https://github.com/ggerganov/whisper.cpp/actions)
6
- [![License: MIT](https://img.shields.io/badge/license-MIT-blue.svg)](https://opensource.org/licenses/MIT)
7
- [![Conan Center](https://shields.io/conan/v/whisper-cpp)](https://conan.io/center/whisper-cpp)
8
- [![npm](https://img.shields.io/npm/v/whisper.cpp.svg)](https://www.npmjs.com/package/whisper.cpp/)
9
-
10
- Stable: [v1.6.2](https://github.com/ggerganov/whisper.cpp/releases/tag/v1.6.0) / [Roadmap | F.A.Q.](https://github.com/ggerganov/whisper.cpp/discussions/126)
11
-
12
- High-performance inference of [OpenAI's Whisper](https://github.com/openai/whisper) automatic speech recognition (ASR) model:
13
-
14
- - Plain C/C++ implementation without dependencies
15
- - Apple Silicon first-class citizen - optimized via ARM NEON, Accelerate framework, Metal and [Core ML](https://github.com/ggerganov/whisper.cpp#core-ml-support)
16
- - AVX intrinsics support for x86 architectures
17
- - VSX intrinsics support for POWER architectures
18
- - Mixed F16 / F32 precision
19
- - [4-bit and 5-bit integer quantization support](https://github.com/ggerganov/whisper.cpp#quantization)
20
- - Zero memory allocations at runtime
21
- - Support for CPU-only inference
22
- - [Efficient GPU support for NVIDIA](https://github.com/ggerganov/whisper.cpp#nvidia-gpu-support-via-cublas)
23
- - [OpenVINO Support](https://github.com/ggerganov/whisper.cpp#openvino-support)
24
- - [C-style API](https://github.com/ggerganov/whisper.cpp/blob/master/whisper.h)
25
-
26
- Supported platforms:
27
-
28
- - [x] Mac OS (Intel and Arm)
29
- - [x] [iOS](examples/whisper.objc)
30
- - [x] [Android](examples/whisper.android)
31
- - [x] [Java](bindings/java/README.md)
32
- - [x] Linux / [FreeBSD](https://github.com/ggerganov/whisper.cpp/issues/56#issuecomment-1350920264)
33
- - [x] [WebAssembly](examples/whisper.wasm)
34
- - [x] Windows ([MSVC](https://github.com/ggerganov/whisper.cpp/blob/master/.github/workflows/build.yml#L117-L144) and [MinGW](https://github.com/ggerganov/whisper.cpp/issues/168)]
35
- - [x] [Raspberry Pi](https://github.com/ggerganov/whisper.cpp/discussions/166)
36
- - [x] [Docker](https://github.com/ggerganov/whisper.cpp/pkgs/container/whisper.cpp)
37
-
38
- The entire high-level implementation of the model is contained in [whisper.h](include/whisper.h) and [whisper.cpp](src/whisper.cpp).
39
- The rest of the code is part of the [`ggml`](https://github.com/ggerganov/ggml) machine learning library.
40
-
41
- Having such a lightweight implementation of the model allows to easily integrate it in different platforms and applications.
42
- As an example, here is a video of running the model on an iPhone 13 device - fully offline, on-device: [whisper.objc](examples/whisper.objc)
43
-
44
- https://user-images.githubusercontent.com/1991296/197385372-962a6dea-bca1-4d50-bf96-1d8c27b98c81.mp4
45
-
46
- You can also easily make your own offline voice assistant application: [command](examples/command)
47
-
48
- https://user-images.githubusercontent.com/1991296/204038393-2f846eae-c255-4099-a76d-5735c25c49da.mp4
49
-
50
- On Apple Silicon, the inference runs fully on the GPU via Metal:
51
-
52
- https://github.com/ggerganov/whisper.cpp/assets/1991296/c82e8f86-60dc-49f2-b048-d2fdbd6b5225
53
-
54
- Or you can even run it straight in the browser: [talk.wasm](examples/talk.wasm)
55
-
56
- ## Implementation details
57
-
58
- - The core tensor operations are implemented in C ([ggml.h](ggml/include/ggml.h) / [ggml.c](ggml/src/ggml.c))
59
- - The transformer model and the high-level C-style API are implemented in C++ ([whisper.h](include/whisper.h) / [whisper.cpp](src/whisper.cpp))
60
- - Sample usage is demonstrated in [main.cpp](examples/main)
61
- - Sample real-time audio transcription from the microphone is demonstrated in [stream.cpp](examples/stream)
62
- - Various other examples are available in the [examples](examples) folder
63
-
64
- The tensor operators are optimized heavily for Apple silicon CPUs. Depending on the computation size, Arm Neon SIMD intrinsics or CBLAS Accelerate framework routines are used. The latter are especially effective for bigger sizes since the Accelerate framework utilizes the special-purpose AMX coprocessor available in modern Apple products.
65
-
66
- ## Quick start
67
-
68
- First clone the repository:
69
-
70
- ```bash
71
- git clone https://github.com/ggerganov/whisper.cpp.git
72
- ```
73
-
74
- Then, download one of the Whisper [models](models/README.md) converted in [`ggml` format](#ggml-format). For example:
75
-
76
- ```bash
77
- bash ./models/download-ggml-model.sh base.en
78
- ```
79
-
80
- Now build the [main](examples/main) example and transcribe an audio file like this:
81
-
82
- ```bash
83
- # build the main example
84
- make
85
-
86
- # transcribe an audio file
87
- ./main -f samples/jfk.wav
88
- ```
89
-
90
- ---
91
-
92
- For a quick demo, simply run `make base.en`:
93
-
94
- ```text
95
- $ make base.en
96
-
97
- cc -I. -O3 -std=c11 -pthread -DGGML_USE_ACCELERATE -c ggml.c -o ggml.o
98
- c++ -I. -I./examples -O3 -std=c++11 -pthread -c whisper.cpp -o whisper.o
99
- c++ -I. -I./examples -O3 -std=c++11 -pthread examples/main/main.cpp whisper.o ggml.o -o main -framework Accelerate
100
- ./main -h
101
-
102
- usage: ./main [options] file0.wav file1.wav ...
103
-
104
- options:
105
- -h, --help [default] show this help message and exit
106
- -t N, --threads N [4 ] number of threads to use during computation
107
- -p N, --processors N [1 ] number of processors to use during computation
108
- -ot N, --offset-t N [0 ] time offset in milliseconds
109
- -on N, --offset-n N [0 ] segment index offset
110
- -d N, --duration N [0 ] duration of audio to process in milliseconds
111
- -mc N, --max-context N [-1 ] maximum number of text context tokens to store
112
- -ml N, --max-len N [0 ] maximum segment length in characters
113
- -sow, --split-on-word [false ] split on word rather than on token
114
- -bo N, --best-of N [5 ] number of best candidates to keep
115
- -bs N, --beam-size N [5 ] beam size for beam search
116
- -wt N, --word-thold N [0.01 ] word timestamp probability threshold
117
- -et N, --entropy-thold N [2.40 ] entropy threshold for decoder fail
118
- -lpt N, --logprob-thold N [-1.00 ] log probability threshold for decoder fail
119
- -debug, --debug-mode [false ] enable debug mode (eg. dump log_mel)
120
- -tr, --translate [false ] translate from source language to english
121
- -di, --diarize [false ] stereo audio diarization
122
- -tdrz, --tinydiarize [false ] enable tinydiarize (requires a tdrz model)
123
- -nf, --no-fallback [false ] do not use temperature fallback while decoding
124
- -otxt, --output-txt [false ] output result in a text file
125
- -ovtt, --output-vtt [false ] output result in a vtt file
126
- -osrt, --output-srt [false ] output result in a srt file
127
- -olrc, --output-lrc [false ] output result in a lrc file
128
- -owts, --output-words [false ] output script for generating karaoke video
129
- -fp, --font-path [/System/Library/Fonts/Supplemental/Courier New Bold.ttf] path to a monospace font for karaoke video
130
- -ocsv, --output-csv [false ] output result in a CSV file
131
- -oj, --output-json [false ] output result in a JSON file
132
- -ojf, --output-json-full [false ] include more information in the JSON file
133
- -of FNAME, --output-file FNAME [ ] output file path (without file extension)
134
- -ps, --print-special [false ] print special tokens
135
- -pc, --print-colors [false ] print colors
136
- -pp, --print-progress [false ] print progress
137
- -nt, --no-timestamps [false ] do not print timestamps
138
- -l LANG, --language LANG [en ] spoken language ('auto' for auto-detect)
139
- -dl, --detect-language [false ] exit after automatically detecting language
140
- --prompt PROMPT [ ] initial prompt
141
- -m FNAME, --model FNAME [models/ggml-base.en.bin] model path
142
- -f FNAME, --file FNAME [ ] input WAV file path
143
- -oved D, --ov-e-device DNAME [CPU ] the OpenVINO device used for encode inference
144
- -ls, --log-score [false ] log best decoder scores of tokens
145
- -ng, --no-gpu [false ] disable GPU
146
-
147
-
148
- bash ./models/download-ggml-model.sh base.en
149
- Downloading ggml model base.en ...
150
- ggml-base.en.bin 100%[========================>] 141.11M 6.34MB/s in 24s
151
- Done! Model 'base.en' saved in 'models/ggml-base.en.bin'
152
- You can now use it like this:
153
-
154
- $ ./main -m models/ggml-base.en.bin -f samples/jfk.wav
155
-
156
-
157
- ===============================================
158
- Running base.en on all samples in ./samples ...
159
- ===============================================
160
-
161
- ----------------------------------------------
162
- [+] Running base.en on samples/jfk.wav ... (run 'ffplay samples/jfk.wav' to listen)
163
- ----------------------------------------------
164
-
165
- whisper_init_from_file: loading model from 'models/ggml-base.en.bin'
166
- whisper_model_load: loading model
167
- whisper_model_load: n_vocab = 51864
168
- whisper_model_load: n_audio_ctx = 1500
169
- whisper_model_load: n_audio_state = 512
170
- whisper_model_load: n_audio_head = 8
171
- whisper_model_load: n_audio_layer = 6
172
- whisper_model_load: n_text_ctx = 448
173
- whisper_model_load: n_text_state = 512
174
- whisper_model_load: n_text_head = 8
175
- whisper_model_load: n_text_layer = 6
176
- whisper_model_load: n_mels = 80
177
- whisper_model_load: f16 = 1
178
- whisper_model_load: type = 2
179
- whisper_model_load: mem required = 215.00 MB (+ 6.00 MB per decoder)
180
- whisper_model_load: kv self size = 5.25 MB
181
- whisper_model_load: kv cross size = 17.58 MB
182
- whisper_model_load: adding 1607 extra tokens
183
- whisper_model_load: model ctx = 140.60 MB
184
- whisper_model_load: model size = 140.54 MB
185
-
186
- system_info: n_threads = 4 / 10 | AVX = 0 | AVX2 = 0 | AVX512 = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 |
187
-
188
- main: processing 'samples/jfk.wav' (176000 samples, 11.0 sec), 4 threads, 1 processors, lang = en, task = transcribe, timestamps = 1 ...
189
-
190
-
191
- [00:00:00.000 --> 00:00:11.000] And so my fellow Americans, ask not what your country can do for you, ask what you can do for your country.
192
-
193
-
194
- whisper_print_timings: fallbacks = 0 p / 0 h
195
- whisper_print_timings: load time = 113.81 ms
196
- whisper_print_timings: mel time = 15.40 ms
197
- whisper_print_timings: sample time = 11.58 ms / 27 runs ( 0.43 ms per run)
198
- whisper_print_timings: encode time = 266.60 ms / 1 runs ( 266.60 ms per run)
199
- whisper_print_timings: decode time = 66.11 ms / 27 runs ( 2.45 ms per run)
200
- whisper_print_timings: total time = 476.31 ms
201
- ```
202
-
203
- The command downloads the `base.en` model converted to custom `ggml` format and runs the inference on all `.wav` samples in the folder `samples`.
204
-
205
- For detailed usage instructions, run: `./main -h`
206
-
207
- Note that the [main](examples/main) example currently runs only with 16-bit WAV files, so make sure to convert your input before running the tool.
208
- For example, you can use `ffmpeg` like this:
209
-
210
- ```bash
211
- ffmpeg -i input.mp3 -ar 16000 -ac 1 -c:a pcm_s16le output.wav
212
- ```
213
-
214
- ## More audio samples
215
-
216
- If you want some extra audio samples to play with, simply run:
217
-
218
- ```
219
- make samples
220
- ```
221
-
222
- This will download a few more audio files from Wikipedia and convert them to 16-bit WAV format via `ffmpeg`.
223
-
224
- You can download and run the other models as follows:
225
-
226
- ```
227
- make tiny.en
228
- make tiny
229
- make base.en
230
- make base
231
- make small.en
232
- make small
233
- make medium.en
234
- make medium
235
- make large-v1
236
- make large-v2
237
- make large-v3
238
- ```
239
-
240
- ## Memory usage
241
-
242
- | Model | Disk | Mem |
243
- | ------ | ------- | ------- |
244
- | tiny | 75 MiB | ~273 MB |
245
- | base | 142 MiB | ~388 MB |
246
- | small | 466 MiB | ~852 MB |
247
- | medium | 1.5 GiB | ~2.1 GB |
248
- | large | 2.9 GiB | ~3.9 GB |
249
-
250
- ## Quantization
251
-
252
- `whisper.cpp` supports integer quantization of the Whisper `ggml` models.
253
- Quantized models require less memory and disk space and depending on the hardware can be processed more efficiently.
254
-
255
- Here are the steps for creating and using a quantized model:
256
-
257
- ```bash
258
- # quantize a model with Q5_0 method
259
- make quantize
260
- ./quantize models/ggml-base.en.bin models/ggml-base.en-q5_0.bin q5_0
261
-
262
- # run the examples as usual, specifying the quantized model file
263
- ./main -m models/ggml-base.en-q5_0.bin ./samples/gb0.wav
264
- ```
265
-
266
- ## Core ML support
267
-
268
- On Apple Silicon devices, the Encoder inference can be executed on the Apple Neural Engine (ANE) via Core ML. This can result in significant
269
- speed-up - more than x3 faster compared with CPU-only execution. Here are the instructions for generating a Core ML model and using it with `whisper.cpp`:
270
-
271
- - Install Python dependencies needed for the creation of the Core ML model:
272
-
273
- ```bash
274
- pip install ane_transformers
275
- pip install openai-whisper
276
- pip install coremltools
277
- ```
278
-
279
- - To ensure `coremltools` operates correctly, please confirm that [Xcode](https://developer.apple.com/xcode/) is installed and execute `xcode-select --install` to install the command-line tools.
280
- - Python 3.10 is recommended.
281
- - MacOS Sonoma (version 14) or newer is recommended, as older versions of MacOS might experience issues with transcription hallucination.
282
- - [OPTIONAL] It is recommended to utilize a Python version management system, such as [Miniconda](https://docs.conda.io/en/latest/miniconda.html) for this step:
283
- - To create an environment, use: `conda create -n py310-whisper python=3.10 -y`
284
- - To activate the environment, use: `conda activate py310-whisper`
285
-
286
- - Generate a Core ML model. For example, to generate a `base.en` model, use:
287
-
288
- ```bash
289
- ./models/generate-coreml-model.sh base.en
290
- ```
291
-
292
- This will generate the folder `models/ggml-base.en-encoder.mlmodelc`
293
-
294
- - Build `whisper.cpp` with Core ML support:
295
-
296
- ```bash
297
- # using Makefile
298
- make clean
299
- WHISPER_COREML=1 make -j
300
-
301
- # using CMake
302
- cmake -B build -DWHISPER_COREML=1
303
- cmake --build build -j --config Release
304
- ```
305
-
306
- - Run the examples as usual. For example:
307
-
308
- ```text
309
- $ ./main -m models/ggml-base.en.bin -f samples/jfk.wav
310
-
311
- ...
312
-
313
- whisper_init_state: loading Core ML model from 'models/ggml-base.en-encoder.mlmodelc'
314
- whisper_init_state: first run on a device may take a while ...
315
- whisper_init_state: Core ML model loaded
316
-
317
- system_info: n_threads = 4 / 10 | AVX = 0 | AVX2 = 0 | AVX512 = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 | COREML = 1 |
318
-
319
- ...
320
- ```
321
-
322
- The first run on a device is slow, since the ANE service compiles the Core ML model to some device-specific format.
323
- Next runs are faster.
324
-
325
- For more information about the Core ML implementation please refer to PR [#566](https://github.com/ggerganov/whisper.cpp/pull/566).
326
-
327
- ## OpenVINO support
328
-
329
- On platforms that support [OpenVINO](https://github.com/openvinotoolkit/openvino), the Encoder inference can be executed
330
- on OpenVINO-supported devices including x86 CPUs and Intel GPUs (integrated & discrete).
331
-
332
- This can result in significant speedup in encoder performance. Here are the instructions for generating the OpenVINO model and using it with `whisper.cpp`:
333
-
334
- - First, setup python virtual env. and install python dependencies. Python 3.10 is recommended.
335
-
336
- Windows:
337
-
338
- ```powershell
339
- cd models
340
- python -m venv openvino_conv_env
341
- openvino_conv_env\Scripts\activate
342
- python -m pip install --upgrade pip
343
- pip install -r requirements-openvino.txt
344
- ```
345
-
346
- Linux and macOS:
347
-
348
- ```bash
349
- cd models
350
- python3 -m venv openvino_conv_env
351
- source openvino_conv_env/bin/activate
352
- python -m pip install --upgrade pip
353
- pip install -r requirements-openvino.txt
354
- ```
355
-
356
- - Generate an OpenVINO encoder model. For example, to generate a `base.en` model, use:
357
-
358
- ```
359
- python convert-whisper-to-openvino.py --model base.en
360
- ```
361
-
362
- This will produce ggml-base.en-encoder-openvino.xml/.bin IR model files. It's recommended to relocate these to the same folder as `ggml` models, as that
363
- is the default location that the OpenVINO extension will search at runtime.
364
-
365
- - Build `whisper.cpp` with OpenVINO support:
366
-
367
- Download OpenVINO package from [release page](https://github.com/openvinotoolkit/openvino/releases). The recommended version to use is [2023.0.0](https://github.com/openvinotoolkit/openvino/releases/tag/2023.0.0).
368
-
369
- After downloading & extracting package onto your development system, set up required environment by sourcing setupvars script. For example:
370
-
371
- Linux:
372
-
373
- ```bash
374
- source /path/to/l_openvino_toolkit_ubuntu22_2023.0.0.10926.b4452d56304_x86_64/setupvars.sh
375
- ```
376
-
377
- Windows (cmd):
378
-
379
- ```powershell
380
- C:\Path\To\w_openvino_toolkit_windows_2023.0.0.10926.b4452d56304_x86_64\setupvars.bat
381
- ```
382
-
383
- And then build the project using cmake:
384
-
385
- ```bash
386
- cmake -B build -DWHISPER_OPENVINO=1
387
- cmake --build build -j --config Release
388
- ```
389
-
390
- - Run the examples as usual. For example:
391
-
392
- ```text
393
- $ ./main -m models/ggml-base.en.bin -f samples/jfk.wav
394
-
395
- ...
396
-
397
- whisper_ctx_init_openvino_encoder: loading OpenVINO model from 'models/ggml-base.en-encoder-openvino.xml'
398
- whisper_ctx_init_openvino_encoder: first run on a device may take a while ...
399
- whisper_openvino_init: path_model = models/ggml-base.en-encoder-openvino.xml, device = GPU, cache_dir = models/ggml-base.en-encoder-openvino-cache
400
- whisper_ctx_init_openvino_encoder: OpenVINO model loaded
401
-
402
- system_info: n_threads = 4 / 8 | AVX = 1 | AVX2 = 1 | AVX512 = 0 | FMA = 1 | NEON = 0 | ARM_FMA = 0 | F16C = 1 | FP16_VA = 0 | WASM_SIMD = 0 | BLAS = 0 | SSE3 = 1 | VSX = 0 | COREML = 0 | OPENVINO = 1 |
403
-
404
- ...
405
- ```
406
-
407
- The first time run on an OpenVINO device is slow, since the OpenVINO framework will compile the IR (Intermediate Representation) model to a device-specific 'blob'. This device-specific blob will get
408
- cached for the next run.
409
-
410
- For more information about the Core ML implementation please refer to PR [#1037](https://github.com/ggerganov/whisper.cpp/pull/1037).
411
-
412
- ## NVIDIA GPU support
413
-
414
- With NVIDIA cards the processing of the models is done efficiently on the GPU via cuBLAS and custom CUDA kernels.
415
- First, make sure you have installed `cuda`: https://developer.nvidia.com/cuda-downloads
416
-
417
- Now build `whisper.cpp` with CUDA support:
418
-
419
- ```
420
- make clean
421
- GGML_CUDA=1 make -j
422
- ```
423
-
424
- ## BLAS CPU support via OpenBLAS
425
-
426
- Encoder processing can be accelerated on the CPU via OpenBLAS.
427
- First, make sure you have installed `openblas`: https://www.openblas.net/
428
-
429
- Now build `whisper.cpp` with OpenBLAS support:
430
-
431
- ```
432
- make clean
433
- GGML_OPENBLAS=1 make -j
434
- ```
435
-
436
- ## BLAS CPU support via Intel MKL
437
-
438
- Encoder processing can be accelerated on the CPU via the BLAS compatible interface of Intel's Math Kernel Library.
439
- First, make sure you have installed Intel's MKL runtime and development packages: https://www.intel.com/content/www/us/en/developer/tools/oneapi/onemkl-download.html
440
-
441
- Now build `whisper.cpp` with Intel MKL BLAS support:
442
-
443
- ```
444
- source /opt/intel/oneapi/setvars.sh
445
- mkdir build
446
- cd build
447
- cmake -DWHISPER_MKL=ON ..
448
- WHISPER_MKL=1 make -j
449
- ```
450
-
451
- ## Docker
452
-
453
- ### Prerequisites
454
-
455
- - Docker must be installed and running on your system.
456
- - Create a folder to store big models & intermediate files (ex. /whisper/models)
457
-
458
- ### Images
459
-
460
- We have two Docker images available for this project:
461
-
462
- 1. `ghcr.io/ggerganov/whisper.cpp:main`: This image includes the main executable file as well as `curl` and `ffmpeg`. (platforms: `linux/amd64`, `linux/arm64`)
463
- 2. `ghcr.io/ggerganov/whisper.cpp:main-cuda`: Same as `main` but compiled with CUDA support. (platforms: `linux/amd64`)
464
-
465
- ### Usage
466
-
467
- ```shell
468
- # download model and persist it in a local folder
469
- docker run -it --rm \
470
- -v path/to/models:/models \
471
- whisper.cpp:main "./models/download-ggml-model.sh base /models"
472
- # transcribe an audio file
473
- docker run -it --rm \
474
- -v path/to/models:/models \
475
- -v path/to/audios:/audios \
476
- whisper.cpp:main "./main -m /models/ggml-base.bin -f /audios/jfk.wav"
477
- # transcribe an audio file in samples folder
478
- docker run -it --rm \
479
- -v path/to/models:/models \
480
- whisper.cpp:main "./main -m /models/ggml-base.bin -f ./samples/jfk.wav"
481
- ```
482
-
483
- ## Installing with Conan
484
-
485
- You can install pre-built binaries for whisper.cpp or build it from source using [Conan](https://conan.io/). Use the following command:
486
-
487
- ```
488
- conan install --requires="whisper-cpp/[*]" --build=missing
489
- ```
490
-
491
- For detailed instructions on how to use Conan, please refer to the [Conan documentation](https://docs.conan.io/2/).
492
-
493
- ## Limitations
494
-
495
- - Inference only
496
-
497
- ## Another example
498
-
499
- Here is another example of transcribing a [3:24 min speech](https://upload.wikimedia.org/wikipedia/commons/1/1f/George_W_Bush_Columbia_FINAL.ogg)
500
- in about half a minute on a MacBook M1 Pro, using `medium.en` model:
501
-
502
- <details>
503
- <summary>Expand to see the result</summary>
504
-
505
- ```text
506
- $ ./main -m models/ggml-medium.en.bin -f samples/gb1.wav -t 8
507
-
508
- whisper_init_from_file: loading model from 'models/ggml-medium.en.bin'
509
- whisper_model_load: loading model
510
- whisper_model_load: n_vocab = 51864
511
- whisper_model_load: n_audio_ctx = 1500
512
- whisper_model_load: n_audio_state = 1024
513
- whisper_model_load: n_audio_head = 16
514
- whisper_model_load: n_audio_layer = 24
515
- whisper_model_load: n_text_ctx = 448
516
- whisper_model_load: n_text_state = 1024
517
- whisper_model_load: n_text_head = 16
518
- whisper_model_load: n_text_layer = 24
519
- whisper_model_load: n_mels = 80
520
- whisper_model_load: f16 = 1
521
- whisper_model_load: type = 4
522
- whisper_model_load: mem required = 1720.00 MB (+ 43.00 MB per decoder)
523
- whisper_model_load: kv self size = 42.00 MB
524
- whisper_model_load: kv cross size = 140.62 MB
525
- whisper_model_load: adding 1607 extra tokens
526
- whisper_model_load: model ctx = 1462.35 MB
527
- whisper_model_load: model size = 1462.12 MB
528
-
529
- system_info: n_threads = 8 / 10 | AVX = 0 | AVX2 = 0 | AVX512 = 0 | FMA = 0 | NEON = 1 | ARM_FMA = 1 | F16C = 0 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 | SSE3 = 0 | VSX = 0 |
530
-
531
- main: processing 'samples/gb1.wav' (3179750 samples, 198.7 sec), 8 threads, 1 processors, lang = en, task = transcribe, timestamps = 1 ...
532
-
533
-
534
- [00:00:00.000 --> 00:00:08.000] My fellow Americans, this day has brought terrible news and great sadness to our country.
535
- [00:00:08.000 --> 00:00:17.000] At nine o'clock this morning, Mission Control in Houston lost contact with our Space Shuttle Columbia.
536
- [00:00:17.000 --> 00:00:23.000] A short time later, debris was seen falling from the skies above Texas.
537
- [00:00:23.000 --> 00:00:29.000] The Columbia's lost. There are no survivors.
538
- [00:00:29.000 --> 00:00:32.000] On board was a crew of seven.
539
- [00:00:32.000 --> 00:00:39.000] Colonel Rick Husband, Lieutenant Colonel Michael Anderson, Commander Laurel Clark,
540
- [00:00:39.000 --> 00:00:48.000] Captain David Brown, Commander William McCool, Dr. Kultna Shavla, and Ilan Ramon,
541
- [00:00:48.000 --> 00:00:52.000] a colonel in the Israeli Air Force.
542
- [00:00:52.000 --> 00:00:58.000] These men and women assumed great risk in the service to all humanity.
543
- [00:00:58.000 --> 00:01:03.000] In an age when space flight has come to seem almost routine,
544
- [00:01:03.000 --> 00:01:07.000] it is easy to overlook the dangers of travel by rocket
545
- [00:01:07.000 --> 00:01:12.000] and the difficulties of navigating the fierce outer atmosphere of the Earth.
546
- [00:01:12.000 --> 00:01:18.000] These astronauts knew the dangers, and they faced them willingly,
547
- [00:01:18.000 --> 00:01:23.000] knowing they had a high and noble purpose in life.
548
- [00:01:23.000 --> 00:01:31.000] Because of their courage and daring and idealism, we will miss them all the more.
549
- [00:01:31.000 --> 00:01:36.000] All Americans today are thinking as well of the families of these men and women
550
- [00:01:36.000 --> 00:01:40.000] who have been given this sudden shock and grief.
551
- [00:01:40.000 --> 00:01:45.000] You're not alone. Our entire nation grieves with you,
552
- [00:01:45.000 --> 00:01:52.000] and those you love will always have the respect and gratitude of this country.
553
- [00:01:52.000 --> 00:01:56.000] The cause in which they died will continue.
554
- [00:01:56.000 --> 00:02:04.000] Mankind is led into the darkness beyond our world by the inspiration of discovery
555
- [00:02:04.000 --> 00:02:11.000] and the longing to understand. Our journey into space will go on.
556
- [00:02:11.000 --> 00:02:16.000] In the skies today, we saw destruction and tragedy.
557
- [00:02:16.000 --> 00:02:22.000] Yet farther than we can see, there is comfort and hope.
558
- [00:02:22.000 --> 00:02:29.000] In the words of the prophet Isaiah, "Lift your eyes and look to the heavens
559
- [00:02:29.000 --> 00:02:35.000] who created all these. He who brings out the starry hosts one by one
560
- [00:02:35.000 --> 00:02:39.000] and calls them each by name."
561
- [00:02:39.000 --> 00:02:46.000] Because of His great power and mighty strength, not one of them is missing.
562
- [00:02:46.000 --> 00:02:55.000] The same Creator who names the stars also knows the names of the seven souls we mourn today.
563
- [00:02:55.000 --> 00:03:01.000] The crew of the shuttle Columbia did not return safely to earth,
564
- [00:03:01.000 --> 00:03:05.000] yet we can pray that all are safely home.
565
- [00:03:05.000 --> 00:03:13.000] May God bless the grieving families, and may God continue to bless America.
566
- [00:03:13.000 --> 00:03:19.000] [Silence]
567
-
568
-
569
- whisper_print_timings: fallbacks = 1 p / 0 h
570
- whisper_print_timings: load time = 569.03 ms
571
- whisper_print_timings: mel time = 146.85 ms
572
- whisper_print_timings: sample time = 238.66 ms / 553 runs ( 0.43 ms per run)
573
- whisper_print_timings: encode time = 18665.10 ms / 9 runs ( 2073.90 ms per run)
574
- whisper_print_timings: decode time = 13090.93 ms / 549 runs ( 23.85 ms per run)
575
- whisper_print_timings: total time = 32733.52 ms
576
- ```
577
-
578
- </details>
579
-
580
- ## Real-time audio input example
581
-
582
- This is a naive example of performing real-time inference on audio from your microphone.
583
- The [stream](examples/stream) tool samples the audio every half a second and runs the transcription continuously.
584
- More info is available in [issue #10](https://github.com/ggerganov/whisper.cpp/issues/10).
585
-
586
- ```bash
587
- make stream
588
- ./stream -m ./models/ggml-base.en.bin -t 8 --step 500 --length 5000
589
- ```
590
-
591
- https://user-images.githubusercontent.com/1991296/194935793-76afede7-cfa8-48d8-a80f-28ba83be7d09.mp4
592
-
593
- ## Confidence color-coding
594
-
595
- Adding the `--print-colors` argument will print the transcribed text using an experimental color coding strategy
596
- to highlight words with high or low confidence:
597
-
598
- ```bash
599
- ./main -m models/ggml-base.en.bin -f samples/gb0.wav --print-colors
600
- ```
601
-
602
- <img width="965" alt="image" src="https://user-images.githubusercontent.com/1991296/197356445-311c8643-9397-4e5e-b46e-0b4b4daa2530.png">
603
-
604
- ## Controlling the length of the generated text segments (experimental)
605
-
606
- For example, to limit the line length to a maximum of 16 characters, simply add `-ml 16`:
607
-
608
- ```text
609
- $ ./main -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -ml 16
610
-
611
- whisper_model_load: loading model from './models/ggml-base.en.bin'
612
- ...
613
- system_info: n_threads = 4 / 10 | AVX2 = 0 | AVX512 = 0 | NEON = 1 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 |
614
-
615
- main: processing './samples/jfk.wav' (176000 samples, 11.0 sec), 4 threads, 1 processors, lang = en, task = transcribe, timestamps = 1 ...
616
-
617
- [00:00:00.000 --> 00:00:00.850] And so my
618
- [00:00:00.850 --> 00:00:01.590] fellow
619
- [00:00:01.590 --> 00:00:04.140] Americans, ask
620
- [00:00:04.140 --> 00:00:05.660] not what your
621
- [00:00:05.660 --> 00:00:06.840] country can do
622
- [00:00:06.840 --> 00:00:08.430] for you, ask
623
- [00:00:08.430 --> 00:00:09.440] what you can do
624
- [00:00:09.440 --> 00:00:10.020] for your
625
- [00:00:10.020 --> 00:00:11.000] country.
626
- ```
627
-
628
- ## Word-level timestamp (experimental)
629
-
630
- The `--max-len` argument can be used to obtain word-level timestamps. Simply use `-ml 1`:
631
-
632
- ```text
633
- $ ./main -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -ml 1
634
-
635
- whisper_model_load: loading model from './models/ggml-base.en.bin'
636
- ...
637
- system_info: n_threads = 4 / 10 | AVX2 = 0 | AVX512 = 0 | NEON = 1 | FP16_VA = 1 | WASM_SIMD = 0 | BLAS = 1 |
638
-
639
- main: processing './samples/jfk.wav' (176000 samples, 11.0 sec), 4 threads, 1 processors, lang = en, task = transcribe, timestamps = 1 ...
640
-
641
- [00:00:00.000 --> 00:00:00.320]
642
- [00:00:00.320 --> 00:00:00.370] And
643
- [00:00:00.370 --> 00:00:00.690] so
644
- [00:00:00.690 --> 00:00:00.850] my
645
- [00:00:00.850 --> 00:00:01.590] fellow
646
- [00:00:01.590 --> 00:00:02.850] Americans
647
- [00:00:02.850 --> 00:00:03.300] ,
648
- [00:00:03.300 --> 00:00:04.140] ask
649
- [00:00:04.140 --> 00:00:04.990] not
650
- [00:00:04.990 --> 00:00:05.410] what
651
- [00:00:05.410 --> 00:00:05.660] your
652
- [00:00:05.660 --> 00:00:06.260] country
653
- [00:00:06.260 --> 00:00:06.600] can
654
- [00:00:06.600 --> 00:00:06.840] do
655
- [00:00:06.840 --> 00:00:07.010] for
656
- [00:00:07.010 --> 00:00:08.170] you
657
- [00:00:08.170 --> 00:00:08.190] ,
658
- [00:00:08.190 --> 00:00:08.430] ask
659
- [00:00:08.430 --> 00:00:08.910] what
660
- [00:00:08.910 --> 00:00:09.040] you
661
- [00:00:09.040 --> 00:00:09.320] can
662
- [00:00:09.320 --> 00:00:09.440] do
663
- [00:00:09.440 --> 00:00:09.760] for
664
- [00:00:09.760 --> 00:00:10.020] your
665
- [00:00:10.020 --> 00:00:10.510] country
666
- [00:00:10.510 --> 00:00:11.000] .
667
- ```
668
-
669
- ## Speaker segmentation via tinydiarize (experimental)
670
-
671
- More information about this approach is available here: https://github.com/ggerganov/whisper.cpp/pull/1058
672
-
673
- Sample usage:
674
-
675
- ```py
676
- # download a tinydiarize compatible model
677
- ./models/download-ggml-model.sh small.en-tdrz
678
-
679
- # run as usual, adding the "-tdrz" command-line argument
680
- ./main -f ./samples/a13.wav -m ./models/ggml-small.en-tdrz.bin -tdrz
681
- ...
682
- main: processing './samples/a13.wav' (480000 samples, 30.0 sec), 4 threads, 1 processors, lang = en, task = transcribe, tdrz = 1, timestamps = 1 ...
683
- ...
684
- [00:00:00.000 --> 00:00:03.800] Okay Houston, we've had a problem here. [SPEAKER_TURN]
685
- [00:00:03.800 --> 00:00:06.200] This is Houston. Say again please. [SPEAKER_TURN]
686
- [00:00:06.200 --> 00:00:08.260] Uh Houston we've had a problem.
687
- [00:00:08.260 --> 00:00:11.320] We've had a main beam up on a volt. [SPEAKER_TURN]
688
- [00:00:11.320 --> 00:00:13.820] Roger main beam interval. [SPEAKER_TURN]
689
- [00:00:13.820 --> 00:00:15.100] Uh uh [SPEAKER_TURN]
690
- [00:00:15.100 --> 00:00:18.020] So okay stand, by thirteen we're looking at it. [SPEAKER_TURN]
691
- [00:00:18.020 --> 00:00:25.740] Okay uh right now uh Houston the uh voltage is uh is looking good um.
692
- [00:00:27.620 --> 00:00:29.940] And we had a a pretty large bank or so.
693
- ```
694
-
695
- ## Karaoke-style movie generation (experimental)
696
-
697
- The [main](examples/main) example provides support for output of karaoke-style movies, where the
698
- currently pronounced word is highlighted. Use the `-wts` argument and run the generated bash script.
699
- This requires to have `ffmpeg` installed.
700
-
701
- Here are a few _"typical"_ examples:
702
-
703
- ```bash
704
- ./main -m ./models/ggml-base.en.bin -f ./samples/jfk.wav -owts
705
- source ./samples/jfk.wav.wts
706
- ffplay ./samples/jfk.wav.mp4
707
- ```
708
-
709
- https://user-images.githubusercontent.com/1991296/199337465-dbee4b5e-9aeb-48a3-b1c6-323ac4db5b2c.mp4
710
-
711
- ---
712
-
713
- ```bash
714
- ./main -m ./models/ggml-base.en.bin -f ./samples/mm0.wav -owts
715
- source ./samples/mm0.wav.wts
716
- ffplay ./samples/mm0.wav.mp4
717
- ```
718
-
719
- https://user-images.githubusercontent.com/1991296/199337504-cc8fd233-0cb7-4920-95f9-4227de3570aa.mp4
720
-
721
- ---
722
-
723
- ```bash
724
- ./main -m ./models/ggml-base.en.bin -f ./samples/gb0.wav -owts
725
- source ./samples/gb0.wav.wts
726
- ffplay ./samples/gb0.wav.mp4
727
- ```
728
-
729
- https://user-images.githubusercontent.com/1991296/199337538-b7b0c7a3-2753-4a88-a0cd-f28a317987ba.mp4
730
-
731
- ---
732
-
733
- ## Video comparison of different models
734
-
735
- Use the [scripts/bench-wts.sh](https://github.com/ggerganov/whisper.cpp/blob/master/scripts/bench-wts.sh) script to generate a video in the following format:
736
-
737
- ```bash
738
- ./scripts/bench-wts.sh samples/jfk.wav
739
- ffplay ./samples/jfk.wav.all.mp4
740
- ```
741
-
742
- https://user-images.githubusercontent.com/1991296/223206245-2d36d903-cf8e-4f09-8c3b-eb9f9c39d6fc.mp4
743
-
744
- ---
745
-
746
- ## Benchmarks
747
-
748
- In order to have an objective comparison of the performance of the inference across different system configurations,
749
- use the [bench](examples/bench) tool. The tool simply runs the Encoder part of the model and prints how much time it
750
- took to execute it. The results are summarized in the following Github issue:
751
-
752
- [Benchmark results](https://github.com/ggerganov/whisper.cpp/issues/89)
753
-
754
- Additionally a script to run whisper.cpp with different models and audio files is provided [bench.py](scripts/bench.py).
755
-
756
- You can run it with the following command, by default it will run against any standard model in the models folder.
757
-
758
- ```bash
759
- python3 scripts/bench.py -f samples/jfk.wav -t 2,4,8 -p 1,2
760
- ```
761
-
762
- It is written in python with the intention of being easy to modify and extend for your benchmarking use case.
763
-
764
- It outputs a csv file with the results of the benchmarking.
765
-
766
- ## `ggml` format
767
-
768
- The original models are converted to a custom binary format. This allows to pack everything needed into a single file:
769
-
770
- - model parameters
771
- - mel filters
772
- - vocabulary
773
- - weights
774
-
775
- You can download the converted models using the [models/download-ggml-model.sh](models/download-ggml-model.sh) script
776
- or manually from here:
777
-
778
- - https://huggingface.co/ggerganov/whisper.cpp
779
- - https://ggml.ggerganov.com
780
-
781
- For more details, see the conversion script [models/convert-pt-to-ggml.py](models/convert-pt-to-ggml.py) or [models/README.md](models/README.md).
782
-
783
- ## [Bindings](https://github.com/ggerganov/whisper.cpp/discussions/categories/bindings)
784
-
785
- - [x] Rust: [tazz4843/whisper-rs](https://github.com/tazz4843/whisper-rs) | [#310](https://github.com/ggerganov/whisper.cpp/discussions/310)
786
- - [x] JavaScript: [bindings/javascript](bindings/javascript) | [#309](https://github.com/ggerganov/whisper.cpp/discussions/309)
787
- - React Native (iOS / Android): [whisper.rn](https://github.com/mybigday/whisper.rn)
788
- - [x] Go: [bindings/go](bindings/go) | [#312](https://github.com/ggerganov/whisper.cpp/discussions/312)
789
- - [x] Java:
790
- - [GiviMAD/whisper-jni](https://github.com/GiviMAD/whisper-jni)
791
- - [x] Ruby: [bindings/ruby](bindings/ruby) | [#507](https://github.com/ggerganov/whisper.cpp/discussions/507)
792
- - [x] Objective-C / Swift: [ggerganov/whisper.spm](https://github.com/ggerganov/whisper.spm) | [#313](https://github.com/ggerganov/whisper.cpp/discussions/313)
793
- - [exPHAT/SwiftWhisper](https://github.com/exPHAT/SwiftWhisper)
794
- - [x] .NET: | [#422](https://github.com/ggerganov/whisper.cpp/discussions/422)
795
- - [sandrohanea/whisper.net](https://github.com/sandrohanea/whisper.net)
796
- - [NickDarvey/whisper](https://github.com/NickDarvey/whisper)
797
- - [x] Python: | [#9](https://github.com/ggerganov/whisper.cpp/issues/9)
798
- - [stlukey/whispercpp.py](https://github.com/stlukey/whispercpp.py) (Cython)
799
- - [AIWintermuteAI/whispercpp](https://github.com/AIWintermuteAI/whispercpp) (Updated fork of aarnphm/whispercpp)
800
- - [aarnphm/whispercpp](https://github.com/aarnphm/whispercpp) (Pybind11)
801
- - [x] R: [bnosac/audio.whisper](https://github.com/bnosac/audio.whisper)
802
- - [x] Unity: [macoron/whisper.unity](https://github.com/Macoron/whisper.unity)
803
-
804
- ## Examples
805
-
806
- There are various examples of using the library for different projects in the [examples](examples) folder.
807
- Some of the examples are even ported to run in the browser using WebAssembly. Check them out!
808
-
809
- | Example | Web | Description |
810
- | --------------------------------------------------- | ------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------- |
811
- | [main](examples/main) | [whisper.wasm](examples/whisper.wasm) | Tool for translating and transcribing audio using Whisper |
812
- | [bench](examples/bench) | [bench.wasm](examples/bench.wasm) | Benchmark the performance of Whisper on your machine |
813
- | [stream](examples/stream) | [stream.wasm](examples/stream.wasm) | Real-time transcription of raw microphone capture |
814
- | [command](examples/command) | [command.wasm](examples/command.wasm) | Basic voice assistant example for receiving voice commands from the mic |
815
- | [wchess](examples/wchess) | [wchess.wasm](examples/wchess) | Voice-controlled chess |
816
- | [talk](examples/talk) | [talk.wasm](examples/talk.wasm) | Talk with a GPT-2 bot |
817
- | [talk-llama](examples/talk-llama) | | Talk with a LLaMA bot |
818
- | [whisper.objc](examples/whisper.objc) | | iOS mobile application using whisper.cpp |
819
- | [whisper.swiftui](examples/whisper.swiftui) | | SwiftUI iOS / macOS application using whisper.cpp |
820
- | [whisper.android](examples/whisper.android) | | Android mobile application using whisper.cpp |
821
- | [whisper.nvim](examples/whisper.nvim) | | Speech-to-text plugin for Neovim |
822
- | [generate-karaoke.sh](examples/generate-karaoke.sh) | | Helper script to easily [generate a karaoke video](https://youtu.be/uj7hVta4blM) of raw audio capture |
823
- | [livestream.sh](examples/livestream.sh) | | [Livestream audio transcription](https://github.com/ggerganov/whisper.cpp/issues/185) |
824
- | [yt-wsp.sh](examples/yt-wsp.sh) | | Download + transcribe and/or translate any VOD [(original)](https://gist.github.com/DaniruKun/96f763ec1a037cc92fe1a059b643b818) |
825
- | [server](examples/server) | | HTTP transcription server with OAI-like API |
826
-
827
- ## [Discussions](https://github.com/ggerganov/whisper.cpp/discussions)
828
-
829
- If you have any kind of feedback about this project feel free to use the Discussions section and open a new topic.
830
- You can use the [Show and tell](https://github.com/ggerganov/whisper.cpp/discussions/categories/show-and-tell) category
831
- to share your own projects that use `whisper.cpp`. If you have a question, make sure to check the
832
- [Frequently asked questions (#126)](https://github.com/ggerganov/whisper.cpp/discussions/126) discussion.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/README_sycl.md DELETED
@@ -1,249 +0,0 @@
1
- # whisper.cpp for SYCL
2
-
3
- [Background](#background)
4
-
5
- [OS](#os)
6
-
7
- [Intel GPU](#intel-gpu)
8
-
9
- [Linux](#linux)
10
-
11
- [Environment Variable](#environment-variable)
12
-
13
- [Known Issue](#known-issue)
14
-
15
- [Todo](#todo)
16
-
17
- ## Background
18
-
19
- SYCL is a higher-level programming model to improve programming productivity on various hardware accelerators�such as CPUs, GPUs, and FPGAs. It is a single-source embedded domain-specific language based on pure C++17.
20
-
21
- oneAPI is a specification that is open and standards-based, supporting multiple architecture types including but not limited to GPU, CPU, and FPGA. The spec has both direct programming and API-based programming paradigms.
22
-
23
- Intel uses the SYCL as direct programming language to support CPU, GPUs and FPGAs.
24
-
25
- To avoid re-inventing the wheel, this code refers other code paths in llama.cpp (like OpenBLAS, cuBLAS, CLBlast). We use a open-source tool [SYCLomatic](https://github.com/oneapi-src/SYCLomatic) (Commercial release [Intel� DPC++ Compatibility Tool](https://www.intel.com/content/www/us/en/developer/tools/oneapi/dpc-compatibility-tool.html)) migrate to SYCL.
26
-
27
- The whisper.cpp for SYCL is used to support Intel GPUs.
28
-
29
- For Intel CPU, recommend to use whisper.cpp for X86 (Intel MKL build).
30
-
31
- ## OS
32
-
33
- |OS|Status|Verified|
34
- |-|-|-|
35
- |Linux|Support|Ubuntu 22.04|
36
- |Windows|Ongoing| |
37
-
38
-
39
- ## Intel GPU
40
-
41
- |Intel GPU| Status | Verified Model|
42
- |-|-|-|
43
- |Intel Data Center Max Series| Support| Max 1550|
44
- |Intel Data Center Flex Series| Support| Flex 170|
45
- |Intel Arc Series| Support| Arc 770|
46
- |Intel built-in Arc GPU| Support| built-in Arc GPU in Meteor Lake|
47
- |Intel iGPU| Support| iGPU in i5-1250P, i7-1165G7|
48
-
49
-
50
- ## Linux
51
-
52
- ### Setup Environment
53
-
54
- 1. Install Intel GPU driver.
55
-
56
- a. Please install Intel GPU driver by official guide: [Install GPU Drivers](https://dgpu-docs.intel.com/driver/installation.html).
57
-
58
- Note: for iGPU, please install the client GPU driver.
59
-
60
- b. Add user to group: video, render.
61
-
62
- ```
63
- sudo usermod -aG render username
64
- sudo usermod -aG video username
65
- ```
66
-
67
- Note: re-login to enable it.
68
-
69
- c. Check
70
-
71
- ```
72
- sudo apt install clinfo
73
- sudo clinfo -l
74
- ```
75
-
76
- Output (example):
77
-
78
- ```
79
- Platform #0: Intel(R) OpenCL Graphics
80
- `-- Device #0: Intel(R) Arc(TM) A770 Graphics
81
-
82
-
83
- Platform #0: Intel(R) OpenCL HD Graphics
84
- `-- Device #0: Intel(R) Iris(R) Xe Graphics [0x9a49]
85
- ```
86
-
87
- 2. Install Intel� oneAPI Base toolkit.
88
-
89
-
90
- a. Please follow the procedure in [Get the Intel� oneAPI Base Toolkit ](https://www.intel.com/content/www/us/en/developer/tools/oneapi/base-toolkit.html).
91
-
92
- Recommend to install to default folder: **/opt/intel/oneapi**.
93
-
94
- Following guide use the default folder as example. If you use other folder, please modify the following guide info with your folder.
95
-
96
- b. Check
97
-
98
- ```
99
- source /opt/intel/oneapi/setvars.sh
100
-
101
- sycl-ls
102
- ```
103
-
104
- There should be one or more level-zero devices. Like **[ext_oneapi_level_zero:gpu:0]**.
105
-
106
- Output (example):
107
- ```
108
- [opencl:acc:0] Intel(R) FPGA Emulation Platform for OpenCL(TM), Intel(R) FPGA Emulation Device OpenCL 1.2 [2023.16.10.0.17_160000]
109
- [opencl:cpu:1] Intel(R) OpenCL, 13th Gen Intel(R) Core(TM) i7-13700K OpenCL 3.0 (Build 0) [2023.16.10.0.17_160000]
110
- [opencl:gpu:2] Intel(R) OpenCL Graphics, Intel(R) Arc(TM) A770 Graphics OpenCL 3.0 NEO [23.30.26918.50]
111
- [ext_oneapi_level_zero:gpu:0] Intel(R) Level-Zero, Intel(R) Arc(TM) A770 Graphics 1.3 [1.3.26918]
112
-
113
- ```
114
-
115
- 2. Build locally:
116
-
117
- ```
118
- mkdir -p build
119
- cd build
120
- source /opt/intel/oneapi/setvars.sh
121
-
122
- #for FP16
123
- #cmake .. -DWHISPER_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DWHISPER_SYCL_F16=ON
124
-
125
- #for FP32
126
- cmake .. -DWHISPER_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx
127
-
128
- #build example/main only
129
- #cmake --build . --config Release --target main
130
-
131
- #build all binary
132
- cmake --build . --config Release -v
133
-
134
- ```
135
-
136
- or
137
-
138
- ```
139
- ./examples/sycl/build.sh
140
- ```
141
-
142
- Note:
143
-
144
- - By default, it will build for all binary files. It will take more time. To reduce the time, we recommend to build for **example/main** only.
145
-
146
- ### Run
147
-
148
- 1. Put model file to folder **models**
149
-
150
- 2. Enable oneAPI running environment
151
-
152
- ```
153
- source /opt/intel/oneapi/setvars.sh
154
- ```
155
-
156
- 3. List device ID
157
-
158
- Run without parameter:
159
-
160
- ```
161
- ./build/bin/ls-sycl-device
162
-
163
- or
164
-
165
- ./build/bin/main
166
- ```
167
-
168
- Check the ID in startup log, like:
169
-
170
- ```
171
- found 4 SYCL devices:
172
- Device 0: Intel(R) Arc(TM) A770 Graphics, compute capability 1.3,
173
- max compute_units 512, max work group size 1024, max sub group size 32, global mem size 16225243136
174
- Device 1: Intel(R) FPGA Emulation Device, compute capability 1.2,
175
- max compute_units 24, max work group size 67108864, max sub group size 64, global mem size 67065057280
176
- Device 2: 13th Gen Intel(R) Core(TM) i7-13700K, compute capability 3.0,
177
- max compute_units 24, max work group size 8192, max sub group size 64, global mem size 67065057280
178
- Device 3: Intel(R) Arc(TM) A770 Graphics, compute capability 3.0,
179
- max compute_units 512, max work group size 1024, max sub group size 32, global mem size 16225243136
180
-
181
- ```
182
-
183
- |Attribute|Note|
184
- |-|-|
185
- |compute capability 1.3|Level-zero running time, recommended |
186
- |compute capability 3.0|OpenCL running time, slower than level-zero in most cases|
187
-
188
- 4. Set device ID and execute whisper.cpp
189
-
190
- Set device ID = 0 by **GGML_SYCL_DEVICE=0**
191
-
192
- ```
193
- GGML_SYCL_DEVICE=0 ./build/bin/main -m models/ggml-base.en.bin -f samples/jfk.wav
194
- ```
195
- or run by script:
196
-
197
- ```
198
- ./examples/sycl/run_whisper.sh
199
- ```
200
-
201
-
202
-
203
- 5. Check the device ID in output
204
-
205
- Like:
206
- ```
207
- Using device **0** (Intel(R) Arc(TM) A770 Graphics) as main device
208
- ```
209
-
210
-
211
- ## Environment Variable
212
-
213
- #### Build
214
-
215
- |Name|Value|Function|
216
- |-|-|-|
217
- |WHISPER_SYCL|ON (mandatory)|Enable build with SYCL code path. <br>For FP32/FP16, WHISPER_SYCL=ON is mandatory.|
218
- |WHISPER_SYCL_F16|ON (optional)|Enable FP16 build with SYCL code path.For FP32, do not set it.|
219
- |CMAKE_C_COMPILER|icx|Use icx compiler for SYCL code path|
220
- |CMAKE_CXX_COMPILER|icpx|use icpx for SYCL code path|
221
-
222
- #### Running
223
-
224
-
225
- |Name|Value|Function|
226
- |-|-|-|
227
- |GGML_SYCL_DEVICE|0 (default) or 1|Set the device id used. Check the device ids by default running output|
228
- |GGML_SYCL_DEBUG|0 (default) or 1|Enable log function by macro: GGML_SYCL_DEBUG|
229
-
230
- ## Known Issue
231
-
232
- - Error: `error while loading shared libraries: libsycl.so.7: cannot open shared object file: No such file or directory`.
233
-
234
- Miss to enable oneAPI running environment.
235
-
236
- Install oneAPI base toolkit and enable it by: `source /opt/intel/oneapi/setvars.sh`.
237
-
238
-
239
- - Hang during startup
240
-
241
- llama.cpp use mmap as default way to read model file and copy to GPU. In some system, memcpy will be abnormal and block.
242
-
243
- Solution: add **--no-mmap**.
244
-
245
- ## Todo
246
-
247
- - Support to build in Windows.
248
-
249
- - Support multiple cards.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/bindings/CMakeLists.txt DELETED
@@ -1,19 +0,0 @@
1
- if (EMSCRIPTEN)
2
- add_subdirectory(javascript)
3
-
4
- add_custom_command(
5
- OUTPUT ${CMAKE_CURRENT_SOURCE_DIR}/javascript/publish.log
6
- DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/javascript/whisper.js
7
- DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/javascript/libwhisper.worker.js
8
- DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/javascript/package.json
9
- WORKING_DIRECTORY ${CMAKE_CURRENT_SOURCE_DIR}/javascript
10
- COMMAND npm publish
11
- COMMAND touch publish.log
12
- COMMENT "Publishing npm module v${PROJECT_VERSION}"
13
- VERBATIM
14
- )
15
-
16
- add_custom_target(publish-npm
17
- DEPENDS javascript/publish.log
18
- )
19
- endif()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/bindings/go/.gitignore DELETED
@@ -1,2 +0,0 @@
1
- build
2
- models
 
 
 
scripts/bindings/go/LICENSE DELETED
@@ -1,21 +0,0 @@
1
- MIT License
2
-
3
- Copyright (c) 2022 David Thorpe
4
-
5
- Permission is hereby granted, free of charge, to any person obtaining a copy
6
- of this software and associated documentation files (the "Software"), to deal
7
- in the Software without restriction, including without limitation the rights
8
- to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
- copies of the Software, and to permit persons to whom the Software is
10
- furnished to do so, subject to the following conditions:
11
-
12
- The above copyright notice and this permission notice shall be included in all
13
- copies or substantial portions of the Software.
14
-
15
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
- FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
- AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
- LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
- OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
- SOFTWARE.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/bindings/go/Makefile DELETED
@@ -1,64 +0,0 @@
1
- ifndef UNAME_S
2
- UNAME_S := $(shell uname -s)
3
- endif
4
-
5
- ifndef UNAME_P
6
- UNAME_P := $(shell uname -p)
7
- endif
8
-
9
- ifndef UNAME_M
10
- UNAME_M := $(shell uname -m)
11
- endif
12
-
13
- GGML_METAL_PATH_RESOURCES := $(abspath ../..)
14
- BUILD_DIR := build
15
- MODELS_DIR := models
16
- EXAMPLES_DIR := $(wildcard examples/*)
17
- INCLUDE_PATH := $(abspath ../../include):$(abspath ../../ggml/include)
18
- LIBRARY_PATH := $(abspath ../..)
19
-
20
- ifeq ($(UNAME_S),Darwin)
21
- EXT_LDFLAGS := -framework Foundation -framework Metal -framework MetalKit
22
- endif
23
-
24
- all: clean whisper examples
25
-
26
- whisper: mkdir
27
- @echo Build whisper
28
- @${MAKE} -C ../.. libwhisper.a
29
-
30
- test: model-small whisper modtidy
31
- ifeq ($(UNAME_S),Darwin)
32
- @C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} GGML_METAL_PATH_RESOURCES=${GGML_METAL_PATH_RESOURCES} go test -ldflags "-extldflags '$(EXT_LDFLAGS)'" -v .
33
- @C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} GGML_METAL_PATH_RESOURCES=${GGML_METAL_PATH_RESOURCES} go test -ldflags "-extldflags '$(EXT_LDFLAGS)'" -v ./pkg/whisper/...
34
- else
35
- @C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} go test -v .
36
- @C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} go test -v ./pkg/whisper/...
37
- endif
38
-
39
- examples: $(EXAMPLES_DIR)
40
-
41
- model-small: mkdir examples/go-model-download
42
- @${BUILD_DIR}/go-model-download -out models ggml-small.en.bin
43
-
44
- $(EXAMPLES_DIR): mkdir whisper modtidy
45
- @echo Build example $(notdir $@)
46
- ifeq ($(UNAME_S),Darwin)
47
- @C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} GGML_METAL_PATH_RESOURCES=${GGML_METAL_PATH_RESOURCES} go build ${BUILD_FLAGS} -ldflags "-extldflags '$(EXT_LDFLAGS)'" -o ${BUILD_DIR}/$(notdir $@) ./$@
48
- else
49
- @C_INCLUDE_PATH=${INCLUDE_PATH} LIBRARY_PATH=${LIBRARY_PATH} go build ${BUILD_FLAGS} -o ${BUILD_DIR}/$(notdir $@) ./$@
50
- endif
51
-
52
- mkdir:
53
- @echo Mkdir ${BUILD_DIR}
54
- @install -d ${BUILD_DIR}
55
- @echo Mkdir ${MODELS_DIR}
56
- @install -d ${MODELS_DIR}
57
-
58
- modtidy:
59
- @go mod tidy
60
-
61
- clean:
62
- @echo Clean
63
- @rm -fr $(BUILD_DIR)
64
- @go clean
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/bindings/go/README.md DELETED
@@ -1,100 +0,0 @@
1
- # Go bindings for Whisper
2
-
3
- This package provides Go bindings for whisper.cpp. They have been tested on:
4
-
5
- * Darwin (OS X) 12.6 on x64_64
6
- * Debian Linux on arm64
7
- * Fedora Linux on x86_64
8
-
9
- The "low level" bindings are in the `bindings/go` directory and there is a more
10
- Go-style package in the `bindings/go/pkg/whisper` directory. The most simple usage
11
- is as follows:
12
-
13
- ```go
14
- import (
15
- "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
16
- )
17
-
18
- func main() {
19
- var modelpath string // Path to the model
20
- var samples []float32 // Samples to process
21
-
22
- // Load the model
23
- model, err := whisper.New(modelpath)
24
- if err != nil {
25
- panic(err)
26
- }
27
- defer model.Close()
28
-
29
- // Process samples
30
- context, err := model.NewContext()
31
- if err != nil {
32
- panic(err)
33
- }
34
- if err := context.Process(samples, nil, nil); err != nil {
35
- return err
36
- }
37
-
38
- // Print out the results
39
- for {
40
- segment, err := context.NextSegment()
41
- if err != nil {
42
- break
43
- }
44
- fmt.Printf("[%6s->%6s] %s\n", segment.Start, segment.End, segment.Text)
45
- }
46
- }
47
- ```
48
-
49
- ## Building & Testing
50
-
51
- In order to build, you need to have the Go compiler installed. You can get it from [here](https://golang.org/dl/). Run the tests with:
52
-
53
- ```bash
54
- git clone https://github.com/ggerganov/whisper.cpp.git
55
- cd whisper.cpp/bindings/go
56
- make test
57
- ```
58
-
59
- This will compile a static `libwhisper.a` in a `build` folder, download a model file, then run the tests. To build the examples:
60
-
61
- ```bash
62
- make examples
63
- ```
64
-
65
- The examples are placed in the `build` directory. Once built, you can download all the models with the following command:
66
-
67
- ```bash
68
- ./build/go-model-download -out models
69
- ```
70
-
71
- And you can then test a model against samples with the following command:
72
-
73
- ```bash
74
- ./build/go-whisper -model models/ggml-tiny.en.bin samples/jfk.wav
75
- ```
76
-
77
- ## Using the bindings
78
-
79
- To use the bindings in your own software,
80
-
81
- 1. Import `github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper` (or `github.com/ggerganov/whisper.cpp/bindings/go` into your package;
82
- 2. Compile `libwhisper.a` (you can use `make whisper` in the `bindings/go` directory);
83
- 3. Link your go binary against whisper by setting the environment variables `C_INCLUDE_PATH` and `LIBRARY_PATH`
84
- to point to the `whisper.h` file directory and `libwhisper.a` file directory respectively.
85
-
86
- Look at the `Makefile` in the `bindings/go` directory for an example.
87
-
88
- The API Documentation:
89
-
90
- * https://pkg.go.dev/github.com/ggerganov/whisper.cpp/bindings/go
91
- * https://pkg.go.dev/github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper
92
-
93
- Getting help:
94
-
95
- * Follow the discussion for the go bindings [here](https://github.com/ggerganov/whisper.cpp/discussions/312)
96
-
97
- ## License
98
-
99
- The license for the Go bindings is the same as the license for the rest of the whisper.cpp project, which is the MIT License. See the `LICENSE` file for more details.
100
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/bindings/go/doc.go DELETED
@@ -1,5 +0,0 @@
1
- /*
2
- github.com/ggerganov/whisper.cpp/bindings/go
3
- provides a speech-to-text service bindings for the Go programming language.
4
- */
5
- package whisper
 
 
 
 
 
 
scripts/bindings/go/examples/go-model-download/context.go DELETED
@@ -1,30 +0,0 @@
1
- package main
2
-
3
- import (
4
- "context"
5
- "os"
6
- "os/signal"
7
- )
8
-
9
- // ContextForSignal returns a context object which is cancelled when a signal
10
- // is received. It returns nil if no signal parameter is provided
11
- func ContextForSignal(signals ...os.Signal) context.Context {
12
- if len(signals) == 0 {
13
- return nil
14
- }
15
-
16
- ch := make(chan os.Signal)
17
- ctx, cancel := context.WithCancel(context.Background())
18
-
19
- // Send message on channel when signal received
20
- signal.Notify(ch, signals...)
21
-
22
- // When any signal received, call cancel
23
- go func() {
24
- <-ch
25
- cancel()
26
- }()
27
-
28
- // Return success
29
- return ctx
30
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/bindings/go/examples/go-model-download/main.go DELETED
@@ -1,208 +0,0 @@
1
- package main
2
-
3
- import (
4
- "context"
5
- "flag"
6
- "fmt"
7
- "io"
8
- "net/http"
9
- "net/url"
10
- "os"
11
- "path/filepath"
12
- "syscall"
13
- "time"
14
- )
15
-
16
- ///////////////////////////////////////////////////////////////////////////////
17
- // CONSTANTS
18
-
19
- const (
20
- srcUrl = "https://huggingface.co/ggerganov/whisper.cpp/resolve/main" // The location of the models
21
- srcExt = ".bin" // Filename extension
22
- bufSize = 1024 * 64 // Size of the buffer used for downloading the model
23
- )
24
-
25
- var (
26
- // The models which will be downloaded, if no model is specified as an argument
27
- modelNames = []string{"ggml-tiny.en", "ggml-tiny", "ggml-base.en", "ggml-base", "ggml-small.en", "ggml-small", "ggml-medium.en", "ggml-medium", "ggml-large-v1", "ggml-large-v2", "ggml-large-v3"}
28
- )
29
-
30
- var (
31
- // The output folder. When not set, use current working directory.
32
- flagOut = flag.String("out", "", "Output folder")
33
-
34
- // HTTP timeout parameter - will timeout if takes longer than this to download a model
35
- flagTimeout = flag.Duration("timeout", 30*time.Minute, "HTTP timeout")
36
-
37
- // Quiet parameter - will not print progress if set
38
- flagQuiet = flag.Bool("quiet", false, "Quiet mode")
39
- )
40
-
41
- ///////////////////////////////////////////////////////////////////////////////
42
- // MAIN
43
-
44
- func main() {
45
- flag.Usage = func() {
46
- name := filepath.Base(flag.CommandLine.Name())
47
- fmt.Fprintf(flag.CommandLine.Output(), "Usage: %s [options] <model>\n\n", name)
48
- flag.PrintDefaults()
49
- }
50
- flag.Parse()
51
-
52
- // Get output path
53
- out, err := GetOut()
54
- if err != nil {
55
- fmt.Fprintln(os.Stderr, "Error:", err)
56
- os.Exit(-1)
57
- }
58
-
59
- // Create context which quits on SIGINT or SIGQUIT
60
- ctx := ContextForSignal(os.Interrupt, syscall.SIGQUIT)
61
-
62
- // Progress filehandle
63
- progress := os.Stdout
64
- if *flagQuiet {
65
- progress, err = os.Open(os.DevNull)
66
- if err != nil {
67
- fmt.Fprintln(os.Stderr, "Error:", err)
68
- os.Exit(-1)
69
- }
70
- defer progress.Close()
71
- }
72
-
73
- // Download models - exit on error or interrupt
74
- for _, model := range GetModels() {
75
- url, err := URLForModel(model)
76
- if err != nil {
77
- fmt.Fprintln(os.Stderr, "Error:", err)
78
- continue
79
- } else if path, err := Download(ctx, progress, url, out); err == nil || err == io.EOF {
80
- continue
81
- } else if err == context.Canceled {
82
- os.Remove(path)
83
- fmt.Fprintln(progress, "\nInterrupted")
84
- break
85
- } else if err == context.DeadlineExceeded {
86
- os.Remove(path)
87
- fmt.Fprintln(progress, "Timeout downloading model")
88
- continue
89
- } else {
90
- os.Remove(path)
91
- fmt.Fprintln(os.Stderr, "Error:", err)
92
- break
93
- }
94
- }
95
- }
96
-
97
- ///////////////////////////////////////////////////////////////////////////////
98
- // PUBLIC METHODS
99
-
100
- // GetOut returns the path to the output directory
101
- func GetOut() (string, error) {
102
- if *flagOut == "" {
103
- return os.Getwd()
104
- }
105
- if info, err := os.Stat(*flagOut); err != nil {
106
- return "", err
107
- } else if !info.IsDir() {
108
- return "", fmt.Errorf("not a directory: %s", info.Name())
109
- } else {
110
- return *flagOut, nil
111
- }
112
- }
113
-
114
- // GetModels returns the list of models to download
115
- func GetModels() []string {
116
- if flag.NArg() == 0 {
117
- return modelNames
118
- } else {
119
- return flag.Args()
120
- }
121
- }
122
-
123
- // URLForModel returns the URL for the given model on huggingface.co
124
- func URLForModel(model string) (string, error) {
125
- if filepath.Ext(model) != srcExt {
126
- model += srcExt
127
- }
128
- url, err := url.Parse(srcUrl)
129
- if err != nil {
130
- return "", err
131
- } else {
132
- url.Path = filepath.Join(url.Path, model)
133
- }
134
- return url.String(), nil
135
- }
136
-
137
- // Download downloads the model from the given URL to the given output directory
138
- func Download(ctx context.Context, p io.Writer, model, out string) (string, error) {
139
- // Create HTTP client
140
- client := http.Client{
141
- Timeout: *flagTimeout,
142
- }
143
-
144
- // Initiate the download
145
- req, err := http.NewRequest("GET", model, nil)
146
- if err != nil {
147
- return "", err
148
- }
149
- resp, err := client.Do(req)
150
- if err != nil {
151
- return "", err
152
- }
153
- defer resp.Body.Close()
154
- if resp.StatusCode != http.StatusOK {
155
- return "", fmt.Errorf("%s: %s", model, resp.Status)
156
- }
157
-
158
- // If output file exists and is the same size as the model, skip
159
- path := filepath.Join(out, filepath.Base(model))
160
- if info, err := os.Stat(path); err == nil && info.Size() == resp.ContentLength {
161
- fmt.Fprintln(p, "Skipping", model, "as it already exists")
162
- return "", nil
163
- }
164
-
165
- // Create file
166
- w, err := os.Create(path)
167
- if err != nil {
168
- return "", err
169
- }
170
- defer w.Close()
171
-
172
- // Report
173
- fmt.Fprintln(p, "Downloading", model, "to", out)
174
-
175
- // Progressively download the model
176
- data := make([]byte, bufSize)
177
- count, pct := int64(0), int64(0)
178
- ticker := time.NewTicker(5 * time.Second)
179
- for {
180
- select {
181
- case <-ctx.Done():
182
- // Cancelled, return error
183
- return path, ctx.Err()
184
- case <-ticker.C:
185
- pct = DownloadReport(p, pct, count, resp.ContentLength)
186
- default:
187
- // Read body
188
- n, err := resp.Body.Read(data)
189
- if err != nil {
190
- DownloadReport(p, pct, count, resp.ContentLength)
191
- return path, err
192
- } else if m, err := w.Write(data[:n]); err != nil {
193
- return path, err
194
- } else {
195
- count += int64(m)
196
- }
197
- }
198
- }
199
- }
200
-
201
- // Report periodically reports the download progress when percentage changes
202
- func DownloadReport(w io.Writer, pct, count, total int64) int64 {
203
- pct_ := count * 100 / total
204
- if pct_ > pct {
205
- fmt.Fprintf(w, " ...%d MB written (%d%%)\n", count/1e6, pct_)
206
- }
207
- return pct_
208
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/bindings/go/examples/go-whisper/color.go DELETED
@@ -1,22 +0,0 @@
1
- package main
2
-
3
- import "fmt"
4
-
5
- ///////////////////////////////////////////////////////////////////////////////
6
- // CONSTANTS
7
-
8
- const (
9
- Reset = "\033[0m"
10
- RGBPrefix = "\033[38;5;" // followed by RGB values in decimal format separated by colons
11
- RGBSuffix = "m"
12
- )
13
-
14
- ///////////////////////////////////////////////////////////////////////////////
15
- // PUBLIC METHODS
16
-
17
- // Colorize text with RGB values, from 0 to 23
18
- func Colorize(text string, v int) string {
19
- // https://en.wikipedia.org/wiki/ANSI_escape_code#8-bit
20
- // Grayscale colors are in the range 232-255
21
- return RGBPrefix + fmt.Sprint(v%24+232) + RGBSuffix + text + Reset
22
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/bindings/go/examples/go-whisper/flags.go DELETED
@@ -1,147 +0,0 @@
1
- package main
2
-
3
- import (
4
- "flag"
5
- "fmt"
6
- "strings"
7
- "time"
8
-
9
- // Packages
10
- whisper "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
11
- )
12
-
13
- ///////////////////////////////////////////////////////////////////////////////
14
- // TYPES
15
-
16
- type Flags struct {
17
- *flag.FlagSet
18
- }
19
-
20
- ///////////////////////////////////////////////////////////////////////////////
21
- // LIFECYCLE
22
-
23
- func NewFlags(name string, args []string) (*Flags, error) {
24
- flags := &Flags{
25
- FlagSet: flag.NewFlagSet(name, flag.ContinueOnError),
26
- }
27
-
28
- // Register the command line arguments
29
- registerFlags(flags)
30
-
31
- // Parse command line
32
- if err := flags.Parse(args); err != nil {
33
- return nil, err
34
- }
35
-
36
- // Return success
37
- return flags, nil
38
- }
39
-
40
- ///////////////////////////////////////////////////////////////////////////////
41
- // PUBLIC METHODS
42
-
43
- func (flags *Flags) GetModel() string {
44
- return flags.Lookup("model").Value.String()
45
- }
46
-
47
- func (flags *Flags) GetLanguage() string {
48
- return flags.Lookup("language").Value.String()
49
- }
50
-
51
- func (flags *Flags) IsTranslate() bool {
52
- return flags.Lookup("translate").Value.(flag.Getter).Get().(bool)
53
- }
54
-
55
- func (flags *Flags) GetOffset() time.Duration {
56
- return flags.Lookup("offset").Value.(flag.Getter).Get().(time.Duration)
57
- }
58
-
59
- func (flags *Flags) GetDuration() time.Duration {
60
- return flags.Lookup("duration").Value.(flag.Getter).Get().(time.Duration)
61
- }
62
-
63
- func (flags *Flags) GetThreads() uint {
64
- return flags.Lookup("threads").Value.(flag.Getter).Get().(uint)
65
- }
66
-
67
- func (flags *Flags) GetOut() string {
68
- return strings.ToLower(flags.Lookup("out").Value.String())
69
- }
70
-
71
- func (flags *Flags) IsTokens() bool {
72
- return flags.Lookup("tokens").Value.String() == "true"
73
- }
74
-
75
- func (flags *Flags) IsColorize() bool {
76
- return flags.Lookup("colorize").Value.String() == "true"
77
- }
78
-
79
- func (flags *Flags) GetMaxLen() uint {
80
- return flags.Lookup("max-len").Value.(flag.Getter).Get().(uint)
81
- }
82
-
83
- func (flags *Flags) GetMaxTokens() uint {
84
- return flags.Lookup("max-tokens").Value.(flag.Getter).Get().(uint)
85
- }
86
-
87
- func (flags *Flags) GetWordThreshold() float32 {
88
- return float32(flags.Lookup("word-thold").Value.(flag.Getter).Get().(float64))
89
- }
90
-
91
- func (flags *Flags) SetParams(context whisper.Context) error {
92
- if lang := flags.GetLanguage(); lang != "" && lang != "auto" {
93
- fmt.Fprintf(flags.Output(), "Setting language to %q\n", lang)
94
- if err := context.SetLanguage(lang); err != nil {
95
- return err
96
- }
97
- }
98
- if flags.IsTranslate() && context.IsMultilingual() {
99
- fmt.Fprintf(flags.Output(), "Setting translate to true\n")
100
- context.SetTranslate(true)
101
- }
102
- if offset := flags.GetOffset(); offset != 0 {
103
- fmt.Fprintf(flags.Output(), "Setting offset to %v\n", offset)
104
- context.SetOffset(offset)
105
- }
106
- if duration := flags.GetDuration(); duration != 0 {
107
- fmt.Fprintf(flags.Output(), "Setting duration to %v\n", duration)
108
- context.SetDuration(duration)
109
- }
110
- if threads := flags.GetThreads(); threads != 0 {
111
- fmt.Fprintf(flags.Output(), "Setting threads to %d\n", threads)
112
- context.SetThreads(threads)
113
- }
114
- if max_len := flags.GetMaxLen(); max_len != 0 {
115
- fmt.Fprintf(flags.Output(), "Setting max_segment_length to %d\n", max_len)
116
- context.SetMaxSegmentLength(max_len)
117
- }
118
- if max_tokens := flags.GetMaxTokens(); max_tokens != 0 {
119
- fmt.Fprintf(flags.Output(), "Setting max_tokens to %d\n", max_tokens)
120
- context.SetMaxTokensPerSegment(max_tokens)
121
- }
122
- if word_threshold := flags.GetWordThreshold(); word_threshold != 0 {
123
- fmt.Fprintf(flags.Output(), "Setting word_threshold to %f\n", word_threshold)
124
- context.SetTokenThreshold(word_threshold)
125
- }
126
-
127
- // Return success
128
- return nil
129
- }
130
-
131
- ///////////////////////////////////////////////////////////////////////////////
132
- // PRIVATE METHODS
133
-
134
- func registerFlags(flag *Flags) {
135
- flag.String("model", "", "Path to the model file")
136
- flag.String("language", "", "Spoken language")
137
- flag.Bool("translate", false, "Translate from source language to english")
138
- flag.Duration("offset", 0, "Time offset")
139
- flag.Duration("duration", 0, "Duration of audio to process")
140
- flag.Uint("threads", 0, "Number of threads to use")
141
- flag.Uint("max-len", 0, "Maximum segment length in characters")
142
- flag.Uint("max-tokens", 0, "Maximum tokens per segment")
143
- flag.Float64("word-thold", 0, "Maximum segment score")
144
- flag.Bool("tokens", false, "Display tokens")
145
- flag.Bool("colorize", false, "Colorize tokens")
146
- flag.String("out", "", "Output format (srt, none or leave as empty string)")
147
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/bindings/go/examples/go-whisper/main.go DELETED
@@ -1,43 +0,0 @@
1
- package main
2
-
3
- import (
4
- "flag"
5
- "fmt"
6
- "os"
7
- "path/filepath"
8
-
9
- // Packages
10
- whisper "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
11
- )
12
-
13
- func main() {
14
- flags, err := NewFlags(filepath.Base(os.Args[0]), os.Args[1:])
15
- if err == flag.ErrHelp {
16
- os.Exit(0)
17
- } else if err != nil {
18
- fmt.Fprintln(os.Stderr, err)
19
- os.Exit(1)
20
- } else if flags.GetModel() == "" {
21
- fmt.Fprintln(os.Stderr, "Use -model flag to specify which model file to use")
22
- os.Exit(1)
23
- } else if flags.NArg() == 0 {
24
- fmt.Fprintln(os.Stderr, "No input files specified")
25
- os.Exit(1)
26
- }
27
-
28
- // Load model
29
- model, err := whisper.New(flags.GetModel())
30
- if err != nil {
31
- fmt.Fprintln(os.Stderr, err)
32
- os.Exit(1)
33
- }
34
- defer model.Close()
35
-
36
- // Process files
37
- for _, filename := range flags.Args() {
38
- if err := Process(model, filename, flags); err != nil {
39
- fmt.Fprintln(os.Stderr, err)
40
- continue
41
- }
42
- }
43
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/bindings/go/examples/go-whisper/process.go DELETED
@@ -1,132 +0,0 @@
1
- package main
2
-
3
- import (
4
- "fmt"
5
- "io"
6
- "os"
7
- "time"
8
-
9
- // Package imports
10
- whisper "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
11
- wav "github.com/go-audio/wav"
12
- )
13
-
14
- func Process(model whisper.Model, path string, flags *Flags) error {
15
- var data []float32
16
-
17
- // Create processing context
18
- context, err := model.NewContext()
19
- if err != nil {
20
- return err
21
- }
22
-
23
- // Set the parameters
24
- if err := flags.SetParams(context); err != nil {
25
- return err
26
- }
27
-
28
- fmt.Printf("\n%s\n", context.SystemInfo())
29
-
30
- // Open the file
31
- fmt.Fprintf(flags.Output(), "Loading %q\n", path)
32
- fh, err := os.Open(path)
33
- if err != nil {
34
- return err
35
- }
36
- defer fh.Close()
37
-
38
- // Decode the WAV file - load the full buffer
39
- dec := wav.NewDecoder(fh)
40
- if buf, err := dec.FullPCMBuffer(); err != nil {
41
- return err
42
- } else if dec.SampleRate != whisper.SampleRate {
43
- return fmt.Errorf("unsupported sample rate: %d", dec.SampleRate)
44
- } else if dec.NumChans != 1 {
45
- return fmt.Errorf("unsupported number of channels: %d", dec.NumChans)
46
- } else {
47
- data = buf.AsFloat32Buffer().Data
48
- }
49
-
50
- // Segment callback when -tokens is specified
51
- var cb whisper.SegmentCallback
52
- if flags.IsTokens() {
53
- cb = func(segment whisper.Segment) {
54
- fmt.Fprintf(flags.Output(), "%02d [%6s->%6s] ", segment.Num, segment.Start.Truncate(time.Millisecond), segment.End.Truncate(time.Millisecond))
55
- for _, token := range segment.Tokens {
56
- if flags.IsColorize() && context.IsText(token) {
57
- fmt.Fprint(flags.Output(), Colorize(token.Text, int(token.P*24.0)), " ")
58
- } else {
59
- fmt.Fprint(flags.Output(), token.Text, " ")
60
- }
61
- }
62
- fmt.Fprintln(flags.Output(), "")
63
- fmt.Fprintln(flags.Output(), "")
64
- }
65
- }
66
-
67
- // Process the data
68
- fmt.Fprintf(flags.Output(), " ...processing %q\n", path)
69
- context.ResetTimings()
70
- if err := context.Process(data, cb, nil); err != nil {
71
- return err
72
- }
73
-
74
- context.PrintTimings()
75
-
76
- // Print out the results
77
- switch {
78
- case flags.GetOut() == "srt":
79
- return OutputSRT(os.Stdout, context)
80
- case flags.GetOut() == "none":
81
- return nil
82
- default:
83
- return Output(os.Stdout, context, flags.IsColorize())
84
- }
85
- }
86
-
87
- // Output text as SRT file
88
- func OutputSRT(w io.Writer, context whisper.Context) error {
89
- n := 1
90
- for {
91
- segment, err := context.NextSegment()
92
- if err == io.EOF {
93
- return nil
94
- } else if err != nil {
95
- return err
96
- }
97
- fmt.Fprintln(w, n)
98
- fmt.Fprintln(w, srtTimestamp(segment.Start), " --> ", srtTimestamp(segment.End))
99
- fmt.Fprintln(w, segment.Text)
100
- fmt.Fprintln(w, "")
101
- n++
102
- }
103
- }
104
-
105
- // Output text to terminal
106
- func Output(w io.Writer, context whisper.Context, colorize bool) error {
107
- for {
108
- segment, err := context.NextSegment()
109
- if err == io.EOF {
110
- return nil
111
- } else if err != nil {
112
- return err
113
- }
114
- fmt.Fprintf(w, "[%6s->%6s]", segment.Start.Truncate(time.Millisecond), segment.End.Truncate(time.Millisecond))
115
- if colorize {
116
- for _, token := range segment.Tokens {
117
- if !context.IsText(token) {
118
- continue
119
- }
120
- fmt.Fprint(w, " ", Colorize(token.Text, int(token.P*24.0)))
121
- }
122
- fmt.Fprint(w, "\n")
123
- } else {
124
- fmt.Fprintln(w, " ", segment.Text)
125
- }
126
- }
127
- }
128
-
129
- // Return srtTimestamp
130
- func srtTimestamp(t time.Duration) string {
131
- return fmt.Sprintf("%02d:%02d:%02d,%03d", t/time.Hour, (t%time.Hour)/time.Minute, (t%time.Minute)/time.Second, (t%time.Second)/time.Millisecond)
132
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/bindings/go/go.mod DELETED
@@ -1,16 +0,0 @@
1
- module github.com/ggerganov/whisper.cpp/bindings/go
2
-
3
- go 1.19
4
-
5
- require (
6
- github.com/go-audio/wav v1.1.0
7
- github.com/stretchr/testify v1.8.1
8
- )
9
-
10
- require (
11
- github.com/davecgh/go-spew v1.1.1 // indirect
12
- github.com/go-audio/audio v1.0.0 // indirect
13
- github.com/go-audio/riff v1.0.0 // indirect
14
- github.com/pmezard/go-difflib v1.0.0 // indirect
15
- gopkg.in/yaml.v3 v3.0.1 // indirect
16
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/bindings/go/go.sum DELETED
@@ -1,23 +0,0 @@
1
- github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
2
- github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
3
- github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
4
- github.com/go-audio/audio v1.0.0 h1:zS9vebldgbQqktK4H0lUqWrG8P0NxCJVqcj7ZpNnwd4=
5
- github.com/go-audio/audio v1.0.0/go.mod h1:6uAu0+H2lHkwdGsAY+j2wHPNPpPoeg5AaEFh9FlA+Zs=
6
- github.com/go-audio/riff v1.0.0 h1:d8iCGbDvox9BfLagY94fBynxSPHO80LmZCaOsmKxokA=
7
- github.com/go-audio/riff v1.0.0/go.mod h1:l3cQwc85y79NQFCRB7TiPoNiaijp6q8Z0Uv38rVG498=
8
- github.com/go-audio/wav v1.1.0 h1:jQgLtbqBzY7G+BM8fXF7AHUk1uHUviWS4X39d5rsL2g=
9
- github.com/go-audio/wav v1.1.0/go.mod h1:mpe9qfwbScEbkd8uybLuIpTgHyrISw/OTuvjUW2iGtE=
10
- github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
11
- github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
12
- github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
13
- github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw=
14
- github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo=
15
- github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
16
- github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU=
17
- github.com/stretchr/testify v1.8.1 h1:w7B6lhMri9wdJUVmEZPGGhZzrYTPvgJArz7wNPgYKsk=
18
- github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4=
19
- gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
20
- gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
21
- gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
22
- gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
23
- gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/bindings/go/params.go DELETED
@@ -1,192 +0,0 @@
1
- package whisper
2
-
3
- import (
4
- "fmt"
5
- )
6
-
7
- ///////////////////////////////////////////////////////////////////////////////
8
- // CGO
9
-
10
- /*
11
- #include <whisper.h>
12
- */
13
- import "C"
14
-
15
- ///////////////////////////////////////////////////////////////////////////////
16
- // PUBLIC METHODS
17
-
18
- func (p *Params) SetTranslate(v bool) {
19
- p.translate = toBool(v)
20
- }
21
-
22
- func (p *Params) SetSplitOnWord(v bool) {
23
- p.split_on_word = toBool(v)
24
- }
25
-
26
- func (p *Params) SetNoContext(v bool) {
27
- p.no_context = toBool(v)
28
- }
29
-
30
- func (p *Params) SetSingleSegment(v bool) {
31
- p.single_segment = toBool(v)
32
- }
33
-
34
- func (p *Params) SetPrintSpecial(v bool) {
35
- p.print_special = toBool(v)
36
- }
37
-
38
- func (p *Params) SetPrintProgress(v bool) {
39
- p.print_progress = toBool(v)
40
- }
41
-
42
- func (p *Params) SetPrintRealtime(v bool) {
43
- p.print_realtime = toBool(v)
44
- }
45
-
46
- func (p *Params) SetPrintTimestamps(v bool) {
47
- p.print_timestamps = toBool(v)
48
- }
49
-
50
- // Set language id
51
- func (p *Params) SetLanguage(lang int) error {
52
- if lang == -1 {
53
- p.language = nil
54
- return nil
55
- }
56
- str := C.whisper_lang_str(C.int(lang))
57
- if str == nil {
58
- return ErrInvalidLanguage
59
- } else {
60
- p.language = str
61
- }
62
- return nil
63
- }
64
-
65
- // Get language id
66
- func (p *Params) Language() int {
67
- if p.language == nil {
68
- return -1
69
- }
70
- return int(C.whisper_lang_id(p.language))
71
- }
72
-
73
- // Threads available
74
- func (p *Params) Threads() int {
75
- return int(p.n_threads)
76
- }
77
-
78
- // Set number of threads to use
79
- func (p *Params) SetThreads(threads int) {
80
- p.n_threads = C.int(threads)
81
- }
82
-
83
- // Set start offset in ms
84
- func (p *Params) SetOffset(offset_ms int) {
85
- p.offset_ms = C.int(offset_ms)
86
- }
87
-
88
- // Set audio duration to process in ms
89
- func (p *Params) SetDuration(duration_ms int) {
90
- p.duration_ms = C.int(duration_ms)
91
- }
92
-
93
- // Set timestamp token probability threshold (~0.01)
94
- func (p *Params) SetTokenThreshold(t float32) {
95
- p.thold_pt = C.float(t)
96
- }
97
-
98
- // Set timestamp token sum probability threshold (~0.01)
99
- func (p *Params) SetTokenSumThreshold(t float32) {
100
- p.thold_ptsum = C.float(t)
101
- }
102
-
103
- // Set max segment length in characters
104
- func (p *Params) SetMaxSegmentLength(n int) {
105
- p.max_len = C.int(n)
106
- }
107
-
108
- func (p *Params) SetTokenTimestamps(b bool) {
109
- p.token_timestamps = toBool(b)
110
- }
111
-
112
- // Set max tokens per segment (0 = no limit)
113
- func (p *Params) SetMaxTokensPerSegment(n int) {
114
- p.max_tokens = C.int(n)
115
- }
116
-
117
- // Set audio encoder context
118
- func (p *Params) SetAudioCtx(n int) {
119
- p.audio_ctx = C.int(n)
120
- }
121
-
122
- func (p *Params) SetMaxContext(n int) {
123
- p.n_max_text_ctx = C.int(n)
124
- }
125
-
126
- func (p *Params) SetBeamSize(n int) {
127
- p.beam_search.beam_size = C.int(n)
128
- }
129
-
130
- func (p *Params) SetEntropyThold(t float32) {
131
- p.entropy_thold = C.float(t)
132
- }
133
-
134
- // Set initial prompt
135
- func (p *Params) SetInitialPrompt(prompt string) {
136
- p.initial_prompt = C.CString(prompt)
137
- }
138
-
139
- ///////////////////////////////////////////////////////////////////////////////
140
- // PRIVATE METHODS
141
-
142
- func toBool(v bool) C.bool {
143
- if v {
144
- return C.bool(true)
145
- }
146
- return C.bool(false)
147
- }
148
-
149
- ///////////////////////////////////////////////////////////////////////////////
150
- // STRINGIFY
151
-
152
- func (p *Params) String() string {
153
- str := "<whisper.params"
154
- str += fmt.Sprintf(" strategy=%v", p.strategy)
155
- str += fmt.Sprintf(" n_threads=%d", p.n_threads)
156
- if p.language != nil {
157
- str += fmt.Sprintf(" language=%s", C.GoString(p.language))
158
- }
159
- str += fmt.Sprintf(" n_max_text_ctx=%d", p.n_max_text_ctx)
160
- str += fmt.Sprintf(" offset_ms=%d", p.offset_ms)
161
- str += fmt.Sprintf(" duration_ms=%d", p.duration_ms)
162
- str += fmt.Sprintf(" audio_ctx=%d", p.audio_ctx)
163
- str += fmt.Sprintf(" initial_prompt=%s", C.GoString(p.initial_prompt))
164
- str += fmt.Sprintf(" entropy_thold=%f", p.entropy_thold)
165
- str += fmt.Sprintf(" beam_size=%d", p.beam_search.beam_size)
166
- if p.translate {
167
- str += " translate"
168
- }
169
- if p.no_context {
170
- str += " no_context"
171
- }
172
- if p.single_segment {
173
- str += " single_segment"
174
- }
175
- if p.print_special {
176
- str += " print_special"
177
- }
178
- if p.print_progress {
179
- str += " print_progress"
180
- }
181
- if p.print_realtime {
182
- str += " print_realtime"
183
- }
184
- if p.print_timestamps {
185
- str += " print_timestamps"
186
- }
187
- if p.token_timestamps {
188
- str += " token_timestamps"
189
- }
190
-
191
- return str + ">"
192
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/bindings/go/pkg/whisper/consts.go DELETED
@@ -1,28 +0,0 @@
1
- package whisper
2
-
3
- import (
4
- "errors"
5
-
6
- // Bindings
7
- whisper "github.com/ggerganov/whisper.cpp/bindings/go"
8
- )
9
-
10
- ///////////////////////////////////////////////////////////////////////////////
11
- // ERRORS
12
-
13
- var (
14
- ErrUnableToLoadModel = errors.New("unable to load model")
15
- ErrInternalAppError = errors.New("internal application error")
16
- ErrProcessingFailed = errors.New("processing failed")
17
- ErrUnsupportedLanguage = errors.New("unsupported language")
18
- ErrModelNotMultilingual = errors.New("model is not multilingual")
19
- )
20
-
21
- ///////////////////////////////////////////////////////////////////////////////
22
- // CONSTANTS
23
-
24
- // SampleRate is the sample rate of the audio data.
25
- const SampleRate = whisper.SampleRate
26
-
27
- // SampleBits is the number of bytes per sample.
28
- const SampleBits = whisper.SampleBits
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/bindings/go/pkg/whisper/context.go DELETED
@@ -1,331 +0,0 @@
1
- package whisper
2
-
3
- import (
4
- "fmt"
5
- "io"
6
- "runtime"
7
- "strings"
8
- "time"
9
-
10
- // Bindings
11
- whisper "github.com/ggerganov/whisper.cpp/bindings/go"
12
- )
13
-
14
- ///////////////////////////////////////////////////////////////////////////////
15
- // TYPES
16
-
17
- type context struct {
18
- n int
19
- model *model
20
- params whisper.Params
21
- }
22
-
23
- // Make sure context adheres to the interface
24
- var _ Context = (*context)(nil)
25
-
26
- ///////////////////////////////////////////////////////////////////////////////
27
- // LIFECYCLE
28
-
29
- func newContext(model *model, params whisper.Params) (Context, error) {
30
- context := new(context)
31
- context.model = model
32
- context.params = params
33
-
34
- // Return success
35
- return context, nil
36
- }
37
-
38
- ///////////////////////////////////////////////////////////////////////////////
39
- // PUBLIC METHODS
40
-
41
- // Set the language to use for speech recognition.
42
- func (context *context) SetLanguage(lang string) error {
43
- if context.model.ctx == nil {
44
- return ErrInternalAppError
45
- }
46
- if !context.model.IsMultilingual() {
47
- return ErrModelNotMultilingual
48
- }
49
-
50
- if lang == "auto" {
51
- context.params.SetLanguage(-1)
52
- } else if id := context.model.ctx.Whisper_lang_id(lang); id < 0 {
53
- return ErrUnsupportedLanguage
54
- } else if err := context.params.SetLanguage(id); err != nil {
55
- return err
56
- }
57
- // Return success
58
- return nil
59
- }
60
-
61
- func (context *context) IsMultilingual() bool {
62
- return context.model.IsMultilingual()
63
- }
64
-
65
- // Get language
66
- func (context *context) Language() string {
67
- id := context.params.Language()
68
- if id == -1 {
69
- return "auto"
70
- }
71
- return whisper.Whisper_lang_str(context.params.Language())
72
- }
73
-
74
- // Set translate flag
75
- func (context *context) SetTranslate(v bool) {
76
- context.params.SetTranslate(v)
77
- }
78
-
79
- func (context *context) SetSplitOnWord(v bool) {
80
- context.params.SetSplitOnWord(v)
81
- }
82
-
83
- // Set number of threads to use
84
- func (context *context) SetThreads(v uint) {
85
- context.params.SetThreads(int(v))
86
- }
87
-
88
- // Set time offset
89
- func (context *context) SetOffset(v time.Duration) {
90
- context.params.SetOffset(int(v.Milliseconds()))
91
- }
92
-
93
- // Set duration of audio to process
94
- func (context *context) SetDuration(v time.Duration) {
95
- context.params.SetDuration(int(v.Milliseconds()))
96
- }
97
-
98
- // Set timestamp token probability threshold (~0.01)
99
- func (context *context) SetTokenThreshold(t float32) {
100
- context.params.SetTokenThreshold(t)
101
- }
102
-
103
- // Set timestamp token sum probability threshold (~0.01)
104
- func (context *context) SetTokenSumThreshold(t float32) {
105
- context.params.SetTokenSumThreshold(t)
106
- }
107
-
108
- // Set max segment length in characters
109
- func (context *context) SetMaxSegmentLength(n uint) {
110
- context.params.SetMaxSegmentLength(int(n))
111
- }
112
-
113
- // Set token timestamps flag
114
- func (context *context) SetTokenTimestamps(b bool) {
115
- context.params.SetTokenTimestamps(b)
116
- }
117
-
118
- // Set max tokens per segment (0 = no limit)
119
- func (context *context) SetMaxTokensPerSegment(n uint) {
120
- context.params.SetMaxTokensPerSegment(int(n))
121
- }
122
-
123
- // Set audio encoder context
124
- func (context *context) SetAudioCtx(n uint) {
125
- context.params.SetAudioCtx(int(n))
126
- }
127
-
128
- // Set maximum number of text context tokens to store
129
- func (context *context) SetMaxContext(n int) {
130
- context.params.SetMaxContext(n)
131
- }
132
-
133
- // Set Beam Size
134
- func (context *context) SetBeamSize(n int) {
135
- context.params.SetBeamSize(n)
136
- }
137
-
138
- // Set Entropy threshold
139
- func (context *context) SetEntropyThold(t float32) {
140
- context.params.SetEntropyThold(t)
141
- }
142
-
143
- // Set initial prompt
144
- func (context *context) SetInitialPrompt(prompt string) {
145
- context.params.SetInitialPrompt(prompt)
146
- }
147
-
148
- // ResetTimings resets the mode timings. Should be called before processing
149
- func (context *context) ResetTimings() {
150
- context.model.ctx.Whisper_reset_timings()
151
- }
152
-
153
- // PrintTimings prints the model timings to stdout.
154
- func (context *context) PrintTimings() {
155
- context.model.ctx.Whisper_print_timings()
156
- }
157
-
158
- // SystemInfo returns the system information
159
- func (context *context) SystemInfo() string {
160
- return fmt.Sprintf("system_info: n_threads = %d / %d | %s\n",
161
- context.params.Threads(),
162
- runtime.NumCPU(),
163
- whisper.Whisper_print_system_info(),
164
- )
165
- }
166
-
167
- // Use mel data at offset_ms to try and auto-detect the spoken language
168
- // Make sure to call whisper_pcm_to_mel() or whisper_set_mel() first.
169
- // Returns the probabilities of all languages.
170
- func (context *context) WhisperLangAutoDetect(offset_ms int, n_threads int) ([]float32, error) {
171
- langProbs, err := context.model.ctx.Whisper_lang_auto_detect(offset_ms, n_threads)
172
- if err != nil {
173
- return nil, err
174
- }
175
- return langProbs, nil
176
- }
177
-
178
- // Process new sample data and return any errors
179
- func (context *context) Process(
180
- data []float32,
181
- callNewSegment SegmentCallback,
182
- callProgress ProgressCallback,
183
- ) error {
184
- if context.model.ctx == nil {
185
- return ErrInternalAppError
186
- }
187
- // If the callback is defined then we force on single_segment mode
188
- if callNewSegment != nil {
189
- context.params.SetSingleSegment(true)
190
- }
191
-
192
- // We don't do parallel processing at the moment
193
- processors := 0
194
- if processors > 1 {
195
- if err := context.model.ctx.Whisper_full_parallel(context.params, data, processors, nil, func(new int) {
196
- if callNewSegment != nil {
197
- num_segments := context.model.ctx.Whisper_full_n_segments()
198
- s0 := num_segments - new
199
- for i := s0; i < num_segments; i++ {
200
- callNewSegment(toSegment(context.model.ctx, i))
201
- }
202
- }
203
- }); err != nil {
204
- return err
205
- }
206
- } else if err := context.model.ctx.Whisper_full(context.params, data, nil, func(new int) {
207
- if callNewSegment != nil {
208
- num_segments := context.model.ctx.Whisper_full_n_segments()
209
- s0 := num_segments - new
210
- for i := s0; i < num_segments; i++ {
211
- callNewSegment(toSegment(context.model.ctx, i))
212
- }
213
- }
214
- }, func(progress int) {
215
- if callProgress != nil {
216
- callProgress(progress)
217
- }
218
- }); err != nil {
219
- return err
220
- }
221
-
222
- // Return success
223
- return nil
224
- }
225
-
226
- // Return the next segment of tokens
227
- func (context *context) NextSegment() (Segment, error) {
228
- if context.model.ctx == nil {
229
- return Segment{}, ErrInternalAppError
230
- }
231
- if context.n >= context.model.ctx.Whisper_full_n_segments() {
232
- return Segment{}, io.EOF
233
- }
234
-
235
- // Populate result
236
- result := toSegment(context.model.ctx, context.n)
237
-
238
- // Increment the cursor
239
- context.n++
240
-
241
- // Return success
242
- return result, nil
243
- }
244
-
245
- // Test for text tokens
246
- func (context *context) IsText(t Token) bool {
247
- switch {
248
- case context.IsBEG(t):
249
- return false
250
- case context.IsSOT(t):
251
- return false
252
- case whisper.Token(t.Id) >= context.model.ctx.Whisper_token_eot():
253
- return false
254
- case context.IsPREV(t):
255
- return false
256
- case context.IsSOLM(t):
257
- return false
258
- case context.IsNOT(t):
259
- return false
260
- default:
261
- return true
262
- }
263
- }
264
-
265
- // Test for "begin" token
266
- func (context *context) IsBEG(t Token) bool {
267
- return whisper.Token(t.Id) == context.model.ctx.Whisper_token_beg()
268
- }
269
-
270
- // Test for "start of transcription" token
271
- func (context *context) IsSOT(t Token) bool {
272
- return whisper.Token(t.Id) == context.model.ctx.Whisper_token_sot()
273
- }
274
-
275
- // Test for "end of transcription" token
276
- func (context *context) IsEOT(t Token) bool {
277
- return whisper.Token(t.Id) == context.model.ctx.Whisper_token_eot()
278
- }
279
-
280
- // Test for "start of prev" token
281
- func (context *context) IsPREV(t Token) bool {
282
- return whisper.Token(t.Id) == context.model.ctx.Whisper_token_prev()
283
- }
284
-
285
- // Test for "start of lm" token
286
- func (context *context) IsSOLM(t Token) bool {
287
- return whisper.Token(t.Id) == context.model.ctx.Whisper_token_solm()
288
- }
289
-
290
- // Test for "No timestamps" token
291
- func (context *context) IsNOT(t Token) bool {
292
- return whisper.Token(t.Id) == context.model.ctx.Whisper_token_not()
293
- }
294
-
295
- // Test for token associated with a specific language
296
- func (context *context) IsLANG(t Token, lang string) bool {
297
- if id := context.model.ctx.Whisper_lang_id(lang); id >= 0 {
298
- return whisper.Token(t.Id) == context.model.ctx.Whisper_token_lang(id)
299
- } else {
300
- return false
301
- }
302
- }
303
-
304
- ///////////////////////////////////////////////////////////////////////////////
305
- // PRIVATE METHODS
306
-
307
- func toSegment(ctx *whisper.Context, n int) Segment {
308
- return Segment{
309
- Num: n,
310
- Text: strings.TrimSpace(ctx.Whisper_full_get_segment_text(n)),
311
- Start: time.Duration(ctx.Whisper_full_get_segment_t0(n)) * time.Millisecond * 10,
312
- End: time.Duration(ctx.Whisper_full_get_segment_t1(n)) * time.Millisecond * 10,
313
- Tokens: toTokens(ctx, n),
314
- }
315
- }
316
-
317
- func toTokens(ctx *whisper.Context, n int) []Token {
318
- result := make([]Token, ctx.Whisper_full_n_tokens(n))
319
- for i := 0; i < len(result); i++ {
320
- data := ctx.Whisper_full_get_token_data(n, i)
321
-
322
- result[i] = Token{
323
- Id: int(ctx.Whisper_full_get_token_id(n, i)),
324
- Text: ctx.Whisper_full_get_token_text(n, i),
325
- P: ctx.Whisper_full_get_token_p(n, i),
326
- Start: time.Duration(data.T0()) * time.Millisecond * 10,
327
- End: time.Duration(data.T1()) * time.Millisecond * 10,
328
- }
329
- }
330
- return result
331
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/bindings/go/pkg/whisper/context_test.go DELETED
@@ -1,55 +0,0 @@
1
- package whisper_test
2
-
3
- import (
4
- "os"
5
- "testing"
6
-
7
- // Packages
8
- whisper "github.com/ggerganov/whisper.cpp/bindings/go/pkg/whisper"
9
- assert "github.com/stretchr/testify/assert"
10
- )
11
-
12
- const (
13
- ModelPath = "../../models/ggml-tiny.bin"
14
- SamplePath = "../../samples/jfk.wav"
15
- )
16
-
17
- func Test_Whisper_000(t *testing.T) {
18
- assert := assert.New(t)
19
- if _, err := os.Stat(ModelPath); os.IsNotExist(err) {
20
- t.Skip("Skipping test, model not found:", ModelPath)
21
- }
22
- if _, err := os.Stat(SamplePath); os.IsNotExist(err) {
23
- t.Skip("Skipping test, sample not found:", SamplePath)
24
- }
25
-
26
- // Load model
27
- model, err := whisper.New(ModelPath)
28
- assert.NoError(err)
29
- assert.NotNil(model)
30
- assert.NoError(model.Close())
31
-
32
- t.Log("languages=", model.Languages())
33
- }
34
-
35
- func Test_Whisper_001(t *testing.T) {
36
- assert := assert.New(t)
37
- if _, err := os.Stat(ModelPath); os.IsNotExist(err) {
38
- t.Skip("Skipping test, model not found:", ModelPath)
39
- }
40
- if _, err := os.Stat(SamplePath); os.IsNotExist(err) {
41
- t.Skip("Skipping test, sample not found:", SamplePath)
42
- }
43
-
44
- // Load model
45
- model, err := whisper.New(ModelPath)
46
- assert.NoError(err)
47
- assert.NotNil(model)
48
- defer model.Close()
49
-
50
- // Get context for decoding
51
- ctx, err := model.NewContext()
52
- assert.NoError(err)
53
- assert.NotNil(ctx)
54
-
55
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/bindings/go/pkg/whisper/doc.go DELETED
@@ -1,4 +0,0 @@
1
- /*
2
- This is the higher-level speech-to-text whisper.cpp API for go
3
- */
4
- package whisper
 
 
 
 
 
scripts/bindings/go/pkg/whisper/interface.go DELETED
@@ -1,102 +0,0 @@
1
- package whisper
2
-
3
- import (
4
- "io"
5
- "time"
6
- )
7
-
8
- ///////////////////////////////////////////////////////////////////////////////
9
- // TYPES
10
-
11
- // SegmentCallback is the callback function for processing segments in real
12
- // time. It is called during the Process function
13
- type SegmentCallback func(Segment)
14
-
15
- // ProgressCallback is the callback function for reporting progress during
16
- // processing. It is called during the Process function
17
- type ProgressCallback func(int)
18
-
19
- // Model is the interface to a whisper model. Create a new model with the
20
- // function whisper.New(string)
21
- type Model interface {
22
- io.Closer
23
-
24
- // Return a new speech-to-text context.
25
- NewContext() (Context, error)
26
-
27
- // Return true if the model is multilingual.
28
- IsMultilingual() bool
29
-
30
- // Return all languages supported.
31
- Languages() []string
32
- }
33
-
34
- // Context is the speach recognition context.
35
- type Context interface {
36
- SetLanguage(string) error // Set the language to use for speech recognition, use "auto" for auto detect language.
37
- SetTranslate(bool) // Set translate flag
38
- IsMultilingual() bool // Return true if the model is multilingual.
39
- Language() string // Get language
40
-
41
- SetOffset(time.Duration) // Set offset
42
- SetDuration(time.Duration) // Set duration
43
- SetThreads(uint) // Set number of threads to use
44
- SetSplitOnWord(bool) // Set split on word flag
45
- SetTokenThreshold(float32) // Set timestamp token probability threshold
46
- SetTokenSumThreshold(float32) // Set timestamp token sum probability threshold
47
- SetMaxSegmentLength(uint) // Set max segment length in characters
48
- SetTokenTimestamps(bool) // Set token timestamps flag
49
- SetMaxTokensPerSegment(uint) // Set max tokens per segment (0 = no limit)
50
- SetAudioCtx(uint) // Set audio encoder context
51
- SetMaxContext(n int) // Set maximum number of text context tokens to store
52
- SetBeamSize(n int) // Set Beam Size
53
- SetEntropyThold(t float32) // Set Entropy threshold
54
- SetInitialPrompt(prompt string) // Set initial prompt
55
-
56
- // Process mono audio data and return any errors.
57
- // If defined, newly generated segments are passed to the
58
- // callback function during processing.
59
- Process([]float32, SegmentCallback, ProgressCallback) error
60
-
61
- // After process is called, return segments until the end of the stream
62
- // is reached, when io.EOF is returned.
63
- NextSegment() (Segment, error)
64
-
65
- IsBEG(Token) bool // Test for "begin" token
66
- IsSOT(Token) bool // Test for "start of transcription" token
67
- IsEOT(Token) bool // Test for "end of transcription" token
68
- IsPREV(Token) bool // Test for "start of prev" token
69
- IsSOLM(Token) bool // Test for "start of lm" token
70
- IsNOT(Token) bool // Test for "No timestamps" token
71
- IsLANG(Token, string) bool // Test for token associated with a specific language
72
- IsText(Token) bool // Test for text token
73
-
74
- // Timings
75
- PrintTimings()
76
- ResetTimings()
77
-
78
- SystemInfo() string
79
- }
80
-
81
- // Segment is the text result of a speech recognition.
82
- type Segment struct {
83
- // Segment Number
84
- Num int
85
-
86
- // Time beginning and end timestamps for the segment.
87
- Start, End time.Duration
88
-
89
- // The text of the segment.
90
- Text string
91
-
92
- // The tokens of the segment.
93
- Tokens []Token
94
- }
95
-
96
- // Token is a text or special token
97
- type Token struct {
98
- Id int
99
- Text string
100
- P float32
101
- Start, End time.Duration
102
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/bindings/go/pkg/whisper/model.go DELETED
@@ -1,101 +0,0 @@
1
- package whisper
2
-
3
- import (
4
- "fmt"
5
- "os"
6
- "runtime"
7
-
8
- // Bindings
9
- whisper "github.com/ggerganov/whisper.cpp/bindings/go"
10
- )
11
-
12
- ///////////////////////////////////////////////////////////////////////////////
13
- // TYPES
14
-
15
- type model struct {
16
- path string
17
- ctx *whisper.Context
18
- }
19
-
20
- // Make sure model adheres to the interface
21
- var _ Model = (*model)(nil)
22
-
23
- ///////////////////////////////////////////////////////////////////////////////
24
- // LIFECYCLE
25
-
26
- func New(path string) (Model, error) {
27
- model := new(model)
28
- if _, err := os.Stat(path); err != nil {
29
- return nil, err
30
- } else if ctx := whisper.Whisper_init(path); ctx == nil {
31
- return nil, ErrUnableToLoadModel
32
- } else {
33
- model.ctx = ctx
34
- model.path = path
35
- }
36
-
37
- // Return success
38
- return model, nil
39
- }
40
-
41
- func (model *model) Close() error {
42
- if model.ctx != nil {
43
- model.ctx.Whisper_free()
44
- }
45
-
46
- // Release resources
47
- model.ctx = nil
48
-
49
- // Return success
50
- return nil
51
- }
52
-
53
- ///////////////////////////////////////////////////////////////////////////////
54
- // STRINGIFY
55
-
56
- func (model *model) String() string {
57
- str := "<whisper.model"
58
- if model.ctx != nil {
59
- str += fmt.Sprintf(" model=%q", model.path)
60
- }
61
- return str + ">"
62
- }
63
-
64
- ///////////////////////////////////////////////////////////////////////////////
65
- // PUBLIC METHODS
66
-
67
- // Return true if model is multilingual (language and translation options are supported)
68
- func (model *model) IsMultilingual() bool {
69
- return model.ctx.Whisper_is_multilingual() != 0
70
- }
71
-
72
- // Return all recognized languages. Initially it is set to auto-detect
73
- func (model *model) Languages() []string {
74
- result := make([]string, 0, whisper.Whisper_lang_max_id())
75
- for i := 0; i < whisper.Whisper_lang_max_id(); i++ {
76
- str := whisper.Whisper_lang_str(i)
77
- if model.ctx.Whisper_lang_id(str) >= 0 {
78
- result = append(result, str)
79
- }
80
- }
81
- return result
82
- }
83
-
84
- func (model *model) NewContext() (Context, error) {
85
- if model.ctx == nil {
86
- return nil, ErrInternalAppError
87
- }
88
-
89
- // Create new context
90
- params := model.ctx.Whisper_full_default_params(whisper.SAMPLING_GREEDY)
91
- params.SetTranslate(false)
92
- params.SetPrintSpecial(false)
93
- params.SetPrintProgress(false)
94
- params.SetPrintRealtime(false)
95
- params.SetPrintTimestamps(false)
96
- params.SetThreads(runtime.NumCPU())
97
- params.SetNoContext(true)
98
-
99
- // Return new context
100
- return newContext(model, params)
101
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/bindings/go/samples/jfk.wav DELETED
Binary file (352 kB)
 
scripts/bindings/go/whisper.go DELETED
@@ -1,468 +0,0 @@
1
- package whisper
2
-
3
- import (
4
- "errors"
5
- "unsafe"
6
- )
7
-
8
- ///////////////////////////////////////////////////////////////////////////////
9
- // CGO
10
-
11
- /*
12
- #cgo LDFLAGS: -lwhisper -lm -lstdc++ -fopenmp
13
- #cgo darwin LDFLAGS: -framework Accelerate -framework Metal -framework Foundation -framework CoreGraphics
14
- #include <whisper.h>
15
- #include <stdlib.h>
16
-
17
- extern void callNewSegment(void* user_data, int new);
18
- extern void callProgress(void* user_data, int progress);
19
- extern bool callEncoderBegin(void* user_data);
20
-
21
- // Text segment callback
22
- // Called on every newly generated text segment
23
- // Use the whisper_full_...() functions to obtain the text segments
24
- static void whisper_new_segment_cb(struct whisper_context* ctx, struct whisper_state* state, int n_new, void* user_data) {
25
- if(user_data != NULL && ctx != NULL) {
26
- callNewSegment(user_data, n_new);
27
- }
28
- }
29
-
30
- // Progress callback
31
- // Called on every newly generated text segment
32
- // Use the whisper_full_...() functions to obtain the text segments
33
- static void whisper_progress_cb(struct whisper_context* ctx, struct whisper_state* state, int progress, void* user_data) {
34
- if(user_data != NULL && ctx != NULL) {
35
- callProgress(user_data, progress);
36
- }
37
- }
38
-
39
- // Encoder begin callback
40
- // If not NULL, called before the encoder starts
41
- // If it returns false, the computation is aborted
42
- static bool whisper_encoder_begin_cb(struct whisper_context* ctx, struct whisper_state* state, void* user_data) {
43
- if(user_data != NULL && ctx != NULL) {
44
- return callEncoderBegin(user_data);
45
- }
46
- return false;
47
- }
48
-
49
- // Get default parameters and set callbacks
50
- static struct whisper_full_params whisper_full_default_params_cb(struct whisper_context* ctx, enum whisper_sampling_strategy strategy) {
51
- struct whisper_full_params params = whisper_full_default_params(strategy);
52
- params.new_segment_callback = whisper_new_segment_cb;
53
- params.new_segment_callback_user_data = (void*)(ctx);
54
- params.encoder_begin_callback = whisper_encoder_begin_cb;
55
- params.encoder_begin_callback_user_data = (void*)(ctx);
56
- params.progress_callback = whisper_progress_cb;
57
- params.progress_callback_user_data = (void*)(ctx);
58
- return params;
59
- }
60
- */
61
- import "C"
62
-
63
- ///////////////////////////////////////////////////////////////////////////////
64
- // TYPES
65
-
66
- type (
67
- Context C.struct_whisper_context
68
- Token C.whisper_token
69
- TokenData C.struct_whisper_token_data
70
- SamplingStrategy C.enum_whisper_sampling_strategy
71
- Params C.struct_whisper_full_params
72
- )
73
-
74
- ///////////////////////////////////////////////////////////////////////////////
75
- // GLOBALS
76
-
77
- const (
78
- SAMPLING_GREEDY SamplingStrategy = C.WHISPER_SAMPLING_GREEDY
79
- SAMPLING_BEAM_SEARCH SamplingStrategy = C.WHISPER_SAMPLING_BEAM_SEARCH
80
- )
81
-
82
- const (
83
- SampleRate = C.WHISPER_SAMPLE_RATE // Expected sample rate, samples per second
84
- SampleBits = uint16(unsafe.Sizeof(C.float(0))) * 8 // Sample size in bits
85
- NumFFT = C.WHISPER_N_FFT
86
- HopLength = C.WHISPER_HOP_LENGTH
87
- ChunkSize = C.WHISPER_CHUNK_SIZE
88
- )
89
-
90
- var (
91
- ErrTokenizerFailed = errors.New("whisper_tokenize failed")
92
- ErrAutoDetectFailed = errors.New("whisper_lang_auto_detect failed")
93
- ErrConversionFailed = errors.New("whisper_convert failed")
94
- ErrInvalidLanguage = errors.New("invalid language")
95
- )
96
-
97
- ///////////////////////////////////////////////////////////////////////////////
98
- // PUBLIC METHODS
99
-
100
- // Allocates all memory needed for the model and loads the model from the given file.
101
- // Returns NULL on failure.
102
- func Whisper_init(path string) *Context {
103
- cPath := C.CString(path)
104
- defer C.free(unsafe.Pointer(cPath))
105
- if ctx := C.whisper_init_from_file_with_params(cPath, C.whisper_context_default_params()); ctx != nil {
106
- return (*Context)(ctx)
107
- } else {
108
- return nil
109
- }
110
- }
111
-
112
- // Frees all memory allocated by the model.
113
- func (ctx *Context) Whisper_free() {
114
- C.whisper_free((*C.struct_whisper_context)(ctx))
115
- }
116
-
117
- // Convert RAW PCM audio to log mel spectrogram.
118
- // The resulting spectrogram is stored inside the provided whisper context.
119
- func (ctx *Context) Whisper_pcm_to_mel(data []float32, threads int) error {
120
- if C.whisper_pcm_to_mel((*C.struct_whisper_context)(ctx), (*C.float)(&data[0]), C.int(len(data)), C.int(threads)) == 0 {
121
- return nil
122
- } else {
123
- return ErrConversionFailed
124
- }
125
- }
126
-
127
- // This can be used to set a custom log mel spectrogram inside the provided whisper context.
128
- // Use this instead of whisper_pcm_to_mel() if you want to provide your own log mel spectrogram.
129
- // n_mel must be 80
130
- func (ctx *Context) Whisper_set_mel(data []float32, n_mel int) error {
131
- if C.whisper_set_mel((*C.struct_whisper_context)(ctx), (*C.float)(&data[0]), C.int(len(data)), C.int(n_mel)) == 0 {
132
- return nil
133
- } else {
134
- return ErrConversionFailed
135
- }
136
- }
137
-
138
- // Run the Whisper encoder on the log mel spectrogram stored inside the provided whisper context.
139
- // Make sure to call whisper_pcm_to_mel() or whisper_set_mel() first.
140
- // offset can be used to specify the offset of the first frame in the spectrogram.
141
- func (ctx *Context) Whisper_encode(offset, threads int) error {
142
- if C.whisper_encode((*C.struct_whisper_context)(ctx), C.int(offset), C.int(threads)) == 0 {
143
- return nil
144
- } else {
145
- return ErrConversionFailed
146
- }
147
- }
148
-
149
- // Run the Whisper decoder to obtain the logits and probabilities for the next token.
150
- // Make sure to call whisper_encode() first.
151
- // tokens + n_tokens is the provided context for the decoder.
152
- // n_past is the number of tokens to use from previous decoder calls.
153
- func (ctx *Context) Whisper_decode(tokens []Token, past, threads int) error {
154
- if C.whisper_decode((*C.struct_whisper_context)(ctx), (*C.whisper_token)(&tokens[0]), C.int(len(tokens)), C.int(past), C.int(threads)) == 0 {
155
- return nil
156
- } else {
157
- return ErrConversionFailed
158
- }
159
- }
160
-
161
- // Convert the provided text into tokens. The tokens pointer must be large enough to hold the resulting tokens.
162
- // Returns the number of tokens on success
163
- func (ctx *Context) Whisper_tokenize(text string, tokens []Token) (int, error) {
164
- cText := C.CString(text)
165
- defer C.free(unsafe.Pointer(cText))
166
- if n := C.whisper_tokenize((*C.struct_whisper_context)(ctx), cText, (*C.whisper_token)(&tokens[0]), C.int(len(tokens))); n >= 0 {
167
- return int(n), nil
168
- } else {
169
- return 0, ErrTokenizerFailed
170
- }
171
- }
172
-
173
- // Return the id of the specified language, returns -1 if not found
174
- // Examples:
175
- //
176
- // "de" -> 2
177
- // "german" -> 2
178
- func (ctx *Context) Whisper_lang_id(lang string) int {
179
- return int(C.whisper_lang_id(C.CString(lang)))
180
- }
181
-
182
- // Largest language id (i.e. number of available languages - 1)
183
- func Whisper_lang_max_id() int {
184
- return int(C.whisper_lang_max_id())
185
- }
186
-
187
- // Return the short string of the specified language id (e.g. 2 -> "de"),
188
- // returns empty string if not found
189
- func Whisper_lang_str(id int) string {
190
- return C.GoString(C.whisper_lang_str(C.int(id)))
191
- }
192
-
193
- // Use mel data at offset_ms to try and auto-detect the spoken language
194
- // Make sure to call whisper_pcm_to_mel() or whisper_set_mel() first.
195
- // Returns the probabilities of all languages.
196
- // ref: https://github.com/openai/whisper/blob/main/whisper/decoding.py#L18-L69
197
- func (ctx *Context) Whisper_lang_auto_detect(offset_ms, n_threads int) ([]float32, error) {
198
- probs := make([]float32, Whisper_lang_max_id()+1)
199
- if n := int(C.whisper_lang_auto_detect((*C.struct_whisper_context)(ctx), C.int(offset_ms), C.int(n_threads), (*C.float)(&probs[0]))); n < 0 {
200
- return nil, ErrAutoDetectFailed
201
- } else {
202
- return probs, nil
203
- }
204
- }
205
-
206
- func (ctx *Context) Whisper_n_len() int {
207
- return int(C.whisper_n_len((*C.struct_whisper_context)(ctx)))
208
- }
209
-
210
- func (ctx *Context) Whisper_n_vocab() int {
211
- return int(C.whisper_n_vocab((*C.struct_whisper_context)(ctx)))
212
- }
213
-
214
- func (ctx *Context) Whisper_n_text_ctx() int {
215
- return int(C.whisper_n_text_ctx((*C.struct_whisper_context)(ctx)))
216
- }
217
-
218
- func (ctx *Context) Whisper_n_audio_ctx() int {
219
- return int(C.whisper_n_audio_ctx((*C.struct_whisper_context)(ctx)))
220
- }
221
-
222
- func (ctx *Context) Whisper_is_multilingual() int {
223
- return int(C.whisper_is_multilingual((*C.struct_whisper_context)(ctx)))
224
- }
225
-
226
- // The probabilities for the next token
227
- //func (ctx *Whisper_context) Whisper_get_probs() []float32 {
228
- // return (*[1 << 30]float32)(unsafe.Pointer(C.whisper_get_probs((*C.struct_whisper_context)(ctx))))[:ctx.Whisper_n_vocab()]
229
- //}
230
-
231
- // Token Id -> String. Uses the vocabulary in the provided context
232
- func (ctx *Context) Whisper_token_to_str(token Token) string {
233
- return C.GoString(C.whisper_token_to_str((*C.struct_whisper_context)(ctx), C.whisper_token(token)))
234
- }
235
-
236
- // Special tokens
237
- func (ctx *Context) Whisper_token_eot() Token {
238
- return Token(C.whisper_token_eot((*C.struct_whisper_context)(ctx)))
239
- }
240
-
241
- // Special tokens
242
- func (ctx *Context) Whisper_token_sot() Token {
243
- return Token(C.whisper_token_sot((*C.struct_whisper_context)(ctx)))
244
- }
245
-
246
- // Special tokens
247
- func (ctx *Context) Whisper_token_prev() Token {
248
- return Token(C.whisper_token_prev((*C.struct_whisper_context)(ctx)))
249
- }
250
-
251
- // Special tokens
252
- func (ctx *Context) Whisper_token_solm() Token {
253
- return Token(C.whisper_token_solm((*C.struct_whisper_context)(ctx)))
254
- }
255
-
256
- // Special tokens
257
- func (ctx *Context) Whisper_token_not() Token {
258
- return Token(C.whisper_token_not((*C.struct_whisper_context)(ctx)))
259
- }
260
-
261
- // Special tokens
262
- func (ctx *Context) Whisper_token_beg() Token {
263
- return Token(C.whisper_token_beg((*C.struct_whisper_context)(ctx)))
264
- }
265
-
266
- // Special tokens
267
- func (ctx *Context) Whisper_token_lang(lang_id int) Token {
268
- return Token(C.whisper_token_lang((*C.struct_whisper_context)(ctx), C.int(lang_id)))
269
- }
270
-
271
- // Task tokens
272
- func (ctx *Context) Whisper_token_translate() Token {
273
- return Token(C.whisper_token_translate((*C.struct_whisper_context)(ctx)))
274
- }
275
-
276
- // Task tokens
277
- func (ctx *Context) Whisper_token_transcribe() Token {
278
- return Token(C.whisper_token_transcribe((*C.struct_whisper_context)(ctx)))
279
- }
280
-
281
- // Performance information
282
- func (ctx *Context) Whisper_print_timings() {
283
- C.whisper_print_timings((*C.struct_whisper_context)(ctx))
284
- }
285
-
286
- // Performance information
287
- func (ctx *Context) Whisper_reset_timings() {
288
- C.whisper_reset_timings((*C.struct_whisper_context)(ctx))
289
- }
290
-
291
- // Print system information
292
- func Whisper_print_system_info() string {
293
- return C.GoString(C.whisper_print_system_info())
294
- }
295
-
296
- // Return default parameters for a strategy
297
- func (ctx *Context) Whisper_full_default_params(strategy SamplingStrategy) Params {
298
- // Get default parameters
299
- return Params(C.whisper_full_default_params_cb((*C.struct_whisper_context)(ctx), C.enum_whisper_sampling_strategy(strategy)))
300
- }
301
-
302
- // Run the entire model: PCM -> log mel spectrogram -> encoder -> decoder -> text
303
- // Uses the specified decoding strategy to obtain the text.
304
- func (ctx *Context) Whisper_full(
305
- params Params,
306
- samples []float32,
307
- encoderBeginCallback func() bool,
308
- newSegmentCallback func(int),
309
- progressCallback func(int),
310
- ) error {
311
- registerEncoderBeginCallback(ctx, encoderBeginCallback)
312
- registerNewSegmentCallback(ctx, newSegmentCallback)
313
- registerProgressCallback(ctx, progressCallback)
314
- defer registerEncoderBeginCallback(ctx, nil)
315
- defer registerNewSegmentCallback(ctx, nil)
316
- defer registerProgressCallback(ctx, nil)
317
- if C.whisper_full((*C.struct_whisper_context)(ctx), (C.struct_whisper_full_params)(params), (*C.float)(&samples[0]), C.int(len(samples))) == 0 {
318
- return nil
319
- } else {
320
- return ErrConversionFailed
321
- }
322
- }
323
-
324
- // Split the input audio in chunks and process each chunk separately using whisper_full()
325
- // It seems this approach can offer some speedup in some cases.
326
- // However, the transcription accuracy can be worse at the beginning and end of each chunk.
327
- func (ctx *Context) Whisper_full_parallel(params Params, samples []float32, processors int, encoderBeginCallback func() bool, newSegmentCallback func(int)) error {
328
- registerEncoderBeginCallback(ctx, encoderBeginCallback)
329
- registerNewSegmentCallback(ctx, newSegmentCallback)
330
- defer registerEncoderBeginCallback(ctx, nil)
331
- defer registerNewSegmentCallback(ctx, nil)
332
-
333
- if C.whisper_full_parallel((*C.struct_whisper_context)(ctx), (C.struct_whisper_full_params)(params), (*C.float)(&samples[0]), C.int(len(samples)), C.int(processors)) == 0 {
334
- return nil
335
- } else {
336
- return ErrConversionFailed
337
- }
338
- }
339
-
340
- // Return the id of the autodetected language, returns -1 if not found
341
- // Added to whisper.cpp in
342
- // https://github.com/ggerganov/whisper.cpp/commit/a1c1583cc7cd8b75222857afc936f0638c5683d6
343
- //
344
- // Examples:
345
- //
346
- // "de" -> 2
347
- // "german" -> 2
348
- func (ctx *Context) Whisper_full_lang_id() int {
349
- return int(C.whisper_full_lang_id((*C.struct_whisper_context)(ctx)))
350
- }
351
-
352
- // Number of generated text segments.
353
- // A segment can be a few words, a sentence, or even a paragraph.
354
- func (ctx *Context) Whisper_full_n_segments() int {
355
- return int(C.whisper_full_n_segments((*C.struct_whisper_context)(ctx)))
356
- }
357
-
358
- // Get the start and end time of the specified segment.
359
- func (ctx *Context) Whisper_full_get_segment_t0(segment int) int64 {
360
- return int64(C.whisper_full_get_segment_t0((*C.struct_whisper_context)(ctx), C.int(segment)))
361
- }
362
-
363
- // Get the start and end time of the specified segment.
364
- func (ctx *Context) Whisper_full_get_segment_t1(segment int) int64 {
365
- return int64(C.whisper_full_get_segment_t1((*C.struct_whisper_context)(ctx), C.int(segment)))
366
- }
367
-
368
- // Get the text of the specified segment.
369
- func (ctx *Context) Whisper_full_get_segment_text(segment int) string {
370
- return C.GoString(C.whisper_full_get_segment_text((*C.struct_whisper_context)(ctx), C.int(segment)))
371
- }
372
-
373
- // Get number of tokens in the specified segment.
374
- func (ctx *Context) Whisper_full_n_tokens(segment int) int {
375
- return int(C.whisper_full_n_tokens((*C.struct_whisper_context)(ctx), C.int(segment)))
376
- }
377
-
378
- // Get the token text of the specified token index in the specified segment.
379
- func (ctx *Context) Whisper_full_get_token_text(segment int, token int) string {
380
- return C.GoString(C.whisper_full_get_token_text((*C.struct_whisper_context)(ctx), C.int(segment), C.int(token)))
381
- }
382
-
383
- // Get the token of the specified token index in the specified segment.
384
- func (ctx *Context) Whisper_full_get_token_id(segment int, token int) Token {
385
- return Token(C.whisper_full_get_token_id((*C.struct_whisper_context)(ctx), C.int(segment), C.int(token)))
386
- }
387
-
388
- // Get token data for the specified token in the specified segment.
389
- // This contains probabilities, timestamps, etc.
390
- func (ctx *Context) Whisper_full_get_token_data(segment int, token int) TokenData {
391
- return TokenData(C.whisper_full_get_token_data((*C.struct_whisper_context)(ctx), C.int(segment), C.int(token)))
392
- }
393
-
394
- // Get the probability of the specified token in the specified segment.
395
- func (ctx *Context) Whisper_full_get_token_p(segment int, token int) float32 {
396
- return float32(C.whisper_full_get_token_p((*C.struct_whisper_context)(ctx), C.int(segment), C.int(token)))
397
- }
398
-
399
- ///////////////////////////////////////////////////////////////////////////////
400
- // CALLBACKS
401
-
402
- var (
403
- cbNewSegment = make(map[unsafe.Pointer]func(int))
404
- cbProgress = make(map[unsafe.Pointer]func(int))
405
- cbEncoderBegin = make(map[unsafe.Pointer]func() bool)
406
- )
407
-
408
- func registerNewSegmentCallback(ctx *Context, fn func(int)) {
409
- if fn == nil {
410
- delete(cbNewSegment, unsafe.Pointer(ctx))
411
- } else {
412
- cbNewSegment[unsafe.Pointer(ctx)] = fn
413
- }
414
- }
415
-
416
- func registerProgressCallback(ctx *Context, fn func(int)) {
417
- if fn == nil {
418
- delete(cbProgress, unsafe.Pointer(ctx))
419
- } else {
420
- cbProgress[unsafe.Pointer(ctx)] = fn
421
- }
422
- }
423
-
424
- func registerEncoderBeginCallback(ctx *Context, fn func() bool) {
425
- if fn == nil {
426
- delete(cbEncoderBegin, unsafe.Pointer(ctx))
427
- } else {
428
- cbEncoderBegin[unsafe.Pointer(ctx)] = fn
429
- }
430
- }
431
-
432
- //export callNewSegment
433
- func callNewSegment(user_data unsafe.Pointer, new C.int) {
434
- if fn, ok := cbNewSegment[user_data]; ok {
435
- fn(int(new))
436
- }
437
- }
438
-
439
- //export callProgress
440
- func callProgress(user_data unsafe.Pointer, progress C.int) {
441
- if fn, ok := cbProgress[user_data]; ok {
442
- fn(int(progress))
443
- }
444
- }
445
-
446
- //export callEncoderBegin
447
- func callEncoderBegin(user_data unsafe.Pointer) C.bool {
448
- if fn, ok := cbEncoderBegin[user_data]; ok {
449
- if fn() {
450
- return C.bool(true)
451
- } else {
452
- return C.bool(false)
453
- }
454
- }
455
- return true
456
- }
457
-
458
- func (t TokenData) T0() int64 {
459
- return int64(t.t0)
460
- }
461
-
462
- func (t TokenData) T1() int64 {
463
- return int64(t.t1)
464
- }
465
-
466
- func (t TokenData) Id() Token {
467
- return Token(t.id)
468
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/bindings/go/whisper_test.go DELETED
@@ -1,113 +0,0 @@
1
- package whisper_test
2
-
3
- import (
4
- "os"
5
- "runtime"
6
- "testing"
7
- "time"
8
-
9
- // Packages
10
- whisper "github.com/ggerganov/whisper.cpp/bindings/go"
11
- wav "github.com/go-audio/wav"
12
- assert "github.com/stretchr/testify/assert"
13
- )
14
-
15
- const (
16
- ModelPath = "models/ggml-small.en.bin"
17
- SamplePath = "samples/jfk.wav"
18
- )
19
-
20
- func Test_Whisper_000(t *testing.T) {
21
- assert := assert.New(t)
22
- if _, err := os.Stat(ModelPath); os.IsNotExist(err) {
23
- t.Skip("Skipping test, model not found:", ModelPath)
24
- }
25
- ctx := whisper.Whisper_init(ModelPath)
26
- assert.NotNil(ctx)
27
- ctx.Whisper_free()
28
- }
29
-
30
- func Test_Whisper_001(t *testing.T) {
31
- assert := assert.New(t)
32
- if _, err := os.Stat(ModelPath); os.IsNotExist(err) {
33
- t.Skip("Skipping test, model not found:", ModelPath)
34
- }
35
- if _, err := os.Stat(SamplePath); os.IsNotExist(err) {
36
- t.Skip("Skipping test, sample not found:", SamplePath)
37
- }
38
-
39
- // Open samples
40
- fh, err := os.Open(SamplePath)
41
- assert.NoError(err)
42
- defer fh.Close()
43
-
44
- // Read samples
45
- d := wav.NewDecoder(fh)
46
- buf, err := d.FullPCMBuffer()
47
- assert.NoError(err)
48
-
49
- // Run whisper
50
- ctx := whisper.Whisper_init(ModelPath)
51
- assert.NotNil(ctx)
52
- defer ctx.Whisper_free()
53
- params := ctx.Whisper_full_default_params(whisper.SAMPLING_GREEDY)
54
- data := buf.AsFloat32Buffer().Data
55
- err = ctx.Whisper_full(params, data, nil, nil, nil)
56
- assert.NoError(err)
57
-
58
- // Print out tokens
59
- num_segments := ctx.Whisper_full_n_segments()
60
- assert.GreaterOrEqual(num_segments, 1)
61
- for i := 0; i < num_segments; i++ {
62
- str := ctx.Whisper_full_get_segment_text(i)
63
- assert.NotEmpty(str)
64
- t0 := time.Duration(ctx.Whisper_full_get_segment_t0(i)) * time.Millisecond
65
- t1 := time.Duration(ctx.Whisper_full_get_segment_t1(i)) * time.Millisecond
66
- t.Logf("[%6s->%-6s] %q", t0, t1, str)
67
- }
68
- }
69
-
70
- func Test_Whisper_002(t *testing.T) {
71
- assert := assert.New(t)
72
- for i := 0; i < whisper.Whisper_lang_max_id(); i++ {
73
- str := whisper.Whisper_lang_str(i)
74
- assert.NotEmpty(str)
75
- t.Log(str)
76
- }
77
- }
78
-
79
- func Test_Whisper_003(t *testing.T) {
80
- threads := runtime.NumCPU()
81
- assert := assert.New(t)
82
- if _, err := os.Stat(ModelPath); os.IsNotExist(err) {
83
- t.Skip("Skipping test, model not found:", ModelPath)
84
- }
85
- if _, err := os.Stat(SamplePath); os.IsNotExist(err) {
86
- t.Skip("Skipping test, sample not found:", SamplePath)
87
- }
88
-
89
- // Open samples
90
- fh, err := os.Open(SamplePath)
91
- assert.NoError(err)
92
- defer fh.Close()
93
-
94
- // Read samples
95
- d := wav.NewDecoder(fh)
96
- buf, err := d.FullPCMBuffer()
97
- assert.NoError(err)
98
-
99
- // Make the model
100
- ctx := whisper.Whisper_init(ModelPath)
101
- assert.NotNil(ctx)
102
- defer ctx.Whisper_free()
103
-
104
- // Get MEL
105
- assert.NoError(ctx.Whisper_pcm_to_mel(buf.AsFloat32Buffer().Data, threads))
106
-
107
- // Get Languages
108
- languages, err := ctx.Whisper_lang_auto_detect(0, threads)
109
- assert.NoError(err)
110
- for i, p := range languages {
111
- t.Logf("%s: %f", whisper.Whisper_lang_str(i), p)
112
- }
113
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/bindings/java/.idea/uiDesigner.xml DELETED
@@ -1,124 +0,0 @@
1
- <?xml version="1.0" encoding="UTF-8"?>
2
- <project version="4">
3
- <component name="Palette2">
4
- <group name="Swing">
5
- <item class="com.intellij.uiDesigner.HSpacer" tooltip-text="Horizontal Spacer" icon="/com/intellij/uiDesigner/icons/hspacer.svg" removable="false" auto-create-binding="false" can-attach-label="false">
6
- <default-constraints vsize-policy="1" hsize-policy="6" anchor="0" fill="1" />
7
- </item>
8
- <item class="com.intellij.uiDesigner.VSpacer" tooltip-text="Vertical Spacer" icon="/com/intellij/uiDesigner/icons/vspacer.svg" removable="false" auto-create-binding="false" can-attach-label="false">
9
- <default-constraints vsize-policy="6" hsize-policy="1" anchor="0" fill="2" />
10
- </item>
11
- <item class="javax.swing.JPanel" icon="/com/intellij/uiDesigner/icons/panel.svg" removable="false" auto-create-binding="false" can-attach-label="false">
12
- <default-constraints vsize-policy="3" hsize-policy="3" anchor="0" fill="3" />
13
- </item>
14
- <item class="javax.swing.JScrollPane" icon="/com/intellij/uiDesigner/icons/scrollPane.svg" removable="false" auto-create-binding="false" can-attach-label="true">
15
- <default-constraints vsize-policy="7" hsize-policy="7" anchor="0" fill="3" />
16
- </item>
17
- <item class="javax.swing.JButton" icon="/com/intellij/uiDesigner/icons/button.svg" removable="false" auto-create-binding="true" can-attach-label="false">
18
- <default-constraints vsize-policy="0" hsize-policy="3" anchor="0" fill="1" />
19
- <initial-values>
20
- <property name="text" value="Button" />
21
- </initial-values>
22
- </item>
23
- <item class="javax.swing.JRadioButton" icon="/com/intellij/uiDesigner/icons/radioButton.svg" removable="false" auto-create-binding="true" can-attach-label="false">
24
- <default-constraints vsize-policy="0" hsize-policy="3" anchor="8" fill="0" />
25
- <initial-values>
26
- <property name="text" value="RadioButton" />
27
- </initial-values>
28
- </item>
29
- <item class="javax.swing.JCheckBox" icon="/com/intellij/uiDesigner/icons/checkBox.svg" removable="false" auto-create-binding="true" can-attach-label="false">
30
- <default-constraints vsize-policy="0" hsize-policy="3" anchor="8" fill="0" />
31
- <initial-values>
32
- <property name="text" value="CheckBox" />
33
- </initial-values>
34
- </item>
35
- <item class="javax.swing.JLabel" icon="/com/intellij/uiDesigner/icons/label.svg" removable="false" auto-create-binding="false" can-attach-label="false">
36
- <default-constraints vsize-policy="0" hsize-policy="0" anchor="8" fill="0" />
37
- <initial-values>
38
- <property name="text" value="Label" />
39
- </initial-values>
40
- </item>
41
- <item class="javax.swing.JTextField" icon="/com/intellij/uiDesigner/icons/textField.svg" removable="false" auto-create-binding="true" can-attach-label="true">
42
- <default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1">
43
- <preferred-size width="150" height="-1" />
44
- </default-constraints>
45
- </item>
46
- <item class="javax.swing.JPasswordField" icon="/com/intellij/uiDesigner/icons/passwordField.svg" removable="false" auto-create-binding="true" can-attach-label="true">
47
- <default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1">
48
- <preferred-size width="150" height="-1" />
49
- </default-constraints>
50
- </item>
51
- <item class="javax.swing.JFormattedTextField" icon="/com/intellij/uiDesigner/icons/formattedTextField.svg" removable="false" auto-create-binding="true" can-attach-label="true">
52
- <default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1">
53
- <preferred-size width="150" height="-1" />
54
- </default-constraints>
55
- </item>
56
- <item class="javax.swing.JTextArea" icon="/com/intellij/uiDesigner/icons/textArea.svg" removable="false" auto-create-binding="true" can-attach-label="true">
57
- <default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
58
- <preferred-size width="150" height="50" />
59
- </default-constraints>
60
- </item>
61
- <item class="javax.swing.JTextPane" icon="/com/intellij/uiDesigner/icons/textPane.svg" removable="false" auto-create-binding="true" can-attach-label="true">
62
- <default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
63
- <preferred-size width="150" height="50" />
64
- </default-constraints>
65
- </item>
66
- <item class="javax.swing.JEditorPane" icon="/com/intellij/uiDesigner/icons/editorPane.svg" removable="false" auto-create-binding="true" can-attach-label="true">
67
- <default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
68
- <preferred-size width="150" height="50" />
69
- </default-constraints>
70
- </item>
71
- <item class="javax.swing.JComboBox" icon="/com/intellij/uiDesigner/icons/comboBox.svg" removable="false" auto-create-binding="true" can-attach-label="true">
72
- <default-constraints vsize-policy="0" hsize-policy="2" anchor="8" fill="1" />
73
- </item>
74
- <item class="javax.swing.JTable" icon="/com/intellij/uiDesigner/icons/table.svg" removable="false" auto-create-binding="true" can-attach-label="false">
75
- <default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
76
- <preferred-size width="150" height="50" />
77
- </default-constraints>
78
- </item>
79
- <item class="javax.swing.JList" icon="/com/intellij/uiDesigner/icons/list.svg" removable="false" auto-create-binding="true" can-attach-label="false">
80
- <default-constraints vsize-policy="6" hsize-policy="2" anchor="0" fill="3">
81
- <preferred-size width="150" height="50" />
82
- </default-constraints>
83
- </item>
84
- <item class="javax.swing.JTree" icon="/com/intellij/uiDesigner/icons/tree.svg" removable="false" auto-create-binding="true" can-attach-label="false">
85
- <default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3">
86
- <preferred-size width="150" height="50" />
87
- </default-constraints>
88
- </item>
89
- <item class="javax.swing.JTabbedPane" icon="/com/intellij/uiDesigner/icons/tabbedPane.svg" removable="false" auto-create-binding="true" can-attach-label="false">
90
- <default-constraints vsize-policy="3" hsize-policy="3" anchor="0" fill="3">
91
- <preferred-size width="200" height="200" />
92
- </default-constraints>
93
- </item>
94
- <item class="javax.swing.JSplitPane" icon="/com/intellij/uiDesigner/icons/splitPane.svg" removable="false" auto-create-binding="false" can-attach-label="false">
95
- <default-constraints vsize-policy="3" hsize-policy="3" anchor="0" fill="3">
96
- <preferred-size width="200" height="200" />
97
- </default-constraints>
98
- </item>
99
- <item class="javax.swing.JSpinner" icon="/com/intellij/uiDesigner/icons/spinner.svg" removable="false" auto-create-binding="true" can-attach-label="true">
100
- <default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1" />
101
- </item>
102
- <item class="javax.swing.JSlider" icon="/com/intellij/uiDesigner/icons/slider.svg" removable="false" auto-create-binding="true" can-attach-label="false">
103
- <default-constraints vsize-policy="0" hsize-policy="6" anchor="8" fill="1" />
104
- </item>
105
- <item class="javax.swing.JSeparator" icon="/com/intellij/uiDesigner/icons/separator.svg" removable="false" auto-create-binding="false" can-attach-label="false">
106
- <default-constraints vsize-policy="6" hsize-policy="6" anchor="0" fill="3" />
107
- </item>
108
- <item class="javax.swing.JProgressBar" icon="/com/intellij/uiDesigner/icons/progressbar.svg" removable="false" auto-create-binding="true" can-attach-label="false">
109
- <default-constraints vsize-policy="0" hsize-policy="6" anchor="0" fill="1" />
110
- </item>
111
- <item class="javax.swing.JToolBar" icon="/com/intellij/uiDesigner/icons/toolbar.svg" removable="false" auto-create-binding="false" can-attach-label="false">
112
- <default-constraints vsize-policy="0" hsize-policy="6" anchor="0" fill="1">
113
- <preferred-size width="-1" height="20" />
114
- </default-constraints>
115
- </item>
116
- <item class="javax.swing.JToolBar$Separator" icon="/com/intellij/uiDesigner/icons/toolbarSeparator.svg" removable="false" auto-create-binding="false" can-attach-label="false">
117
- <default-constraints vsize-policy="0" hsize-policy="0" anchor="0" fill="1" />
118
- </item>
119
- <item class="javax.swing.JScrollBar" icon="/com/intellij/uiDesigner/icons/scrollbar.svg" removable="false" auto-create-binding="true" can-attach-label="false">
120
- <default-constraints vsize-policy="6" hsize-policy="0" anchor="0" fill="2" />
121
- </item>
122
- </group>
123
- </component>
124
- </project>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/bindings/java/README.md DELETED
@@ -1,71 +0,0 @@
1
- # Java JNI bindings for Whisper
2
-
3
- This package provides Java JNI bindings for whisper.cpp. They have been tested on:
4
-
5
- * <strike>Darwin (OS X) 12.6 on x64_64</strike>
6
- * Ubuntu on x86_64
7
- * Windows on x86_64
8
-
9
- The "low level" bindings are in `WhisperCppJnaLibrary`. The most simple usage is as follows:
10
-
11
- JNA will attempt to load the `whispercpp` shared library from:
12
-
13
- - jna.library.path
14
- - jna.platform.library
15
- - ~/Library/Frameworks
16
- - /Library/Frameworks
17
- - /System/Library/Frameworks
18
- - classpath
19
-
20
- ```java
21
- import io.github.ggerganov.whispercpp.WhisperCpp;
22
-
23
- public class Example {
24
-
25
- public static void main(String[] args) {
26
- WhisperCpp whisper = new WhisperCpp();
27
- // By default, models are loaded from ~/.cache/whisper/ and are usually named "ggml-${name}.bin"
28
- // or you can provide the absolute path to the model file.
29
- long context = whisper.initContext("base.en");
30
- try {
31
- var whisperParams = whisper.getFullDefaultParams(WhisperSamplingStrategy.WHISPER_SAMPLING_GREEDY);
32
- // custom configuration if required
33
- whisperParams.temperature_inc = 0f;
34
-
35
- var samples = readAudio(); // divide each value by 32767.0f
36
- whisper.fullTranscribe(whisperParams, samples);
37
-
38
- int segmentCount = whisper.getTextSegmentCount(context);
39
- for (int i = 0; i < segmentCount; i++) {
40
- String text = whisper.getTextSegment(context, i);
41
- System.out.println(segment.getText());
42
- }
43
- } finally {
44
- whisper.freeContext(context);
45
- }
46
- }
47
- }
48
- ```
49
-
50
- ## Building & Testing
51
-
52
- In order to build, you need to have the JDK 8 or higher installed. Run the tests with:
53
-
54
- ```bash
55
- git clone https://github.com/ggerganov/whisper.cpp.git
56
- cd whisper.cpp/bindings/java
57
-
58
- ./gradlew build
59
- ```
60
-
61
- You need to have the `whisper` library in your [JNA library path](https://java-native-access.github.io/jna/4.2.1/com/sun/jna/NativeLibrary.html). On Windows the dll is included in the jar and you can update it:
62
-
63
- ```bash
64
- copy /y ..\..\build\bin\Release\whisper.dll build\generated\resources\main\win32-x86-64\whisper.dll
65
- ```
66
-
67
-
68
- ## License
69
-
70
- The license for the Go bindings is the same as the license for the rest of the whisper.cpp project, which is the MIT License. See the `LICENSE` file for more details.
71
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/bindings/java/build.gradle DELETED
@@ -1,133 +0,0 @@
1
- plugins {
2
- id 'java'
3
- id 'java-library'
4
- id 'maven-publish'
5
- id 'signing'
6
- }
7
-
8
- archivesBaseName = 'whispercpp'
9
- group = 'io.github.ggerganov'
10
- version = '1.4.0'
11
-
12
-
13
- sourceCompatibility = 1.8
14
- targetCompatibility = 1.8
15
-
16
- sourceSets {
17
- main {
18
- resources {
19
- srcDirs = ['src/main/resources', 'build/generated/resources/main']
20
- }
21
- }
22
- test {
23
- runtimeClasspath += files('build/generated/resources/main')
24
- }
25
- }
26
-
27
- tasks.register('copyLibwhisperDynlib', Copy) {
28
- from '../../build'
29
- include 'libwhisper.dynlib'
30
- into 'build/generated/resources/main/darwin'
31
- }
32
-
33
- tasks.register('copyLibwhisperSo', Copy) {
34
- from '../../build'
35
- include 'libwhisper.so'
36
- into 'build/generated/resources/main/linux-x86-64'
37
- }
38
-
39
- tasks.register('copyWhisperDll', Copy) {
40
- from '../../build/Release'
41
- include 'whisper.dll'
42
- into 'build/generated/resources/main/windows-x86-64'
43
- }
44
-
45
- tasks.register('copyLibs') {
46
- dependsOn copyLibwhisperDynlib, copyLibwhisperSo, copyWhisperDll
47
- }
48
-
49
- test {
50
- systemProperty 'jna.library.path', project.file('build/generated/resources/main').absolutePath
51
- }
52
-
53
- java {
54
- withSourcesJar()
55
- withJavadocJar()
56
- }
57
-
58
- jar {
59
- exclude '**/whisper_java.exp', '**/whisper_java.lib'
60
- }
61
-
62
- javadoc {
63
- options.addStringOption('Xdoclint:none', '-quiet')
64
- }
65
-
66
- tasks.withType(Test) {
67
- useJUnitPlatform()
68
- }
69
-
70
- dependencies {
71
- implementation "net.java.dev.jna:jna:5.13.0"
72
- testImplementation "org.junit.jupiter:junit-jupiter:5.9.2"
73
- testImplementation "org.assertj:assertj-core:3.24.2"
74
- }
75
-
76
- repositories {
77
- mavenCentral()
78
- }
79
-
80
- publishing {
81
- publications {
82
- mavenJava(MavenPublication) {
83
- artifactId = 'whispercpp'
84
- from components.java
85
- pom {
86
- name = 'whispercpp'
87
- description = "Java JNA bindings for OpenAI's Whisper model, implemented in C/C++"
88
- url = 'https://github.com/ggerganov/whisper.cpp'
89
- licenses {
90
- license {
91
- name = 'MIT licence'
92
- url = 'https://raw.githubusercontent.com/ggerganov/whisper.cpp/master/LICENSE'
93
- }
94
- }
95
- developers {
96
- developer {
97
- id = 'ggerganov'
98
- name = 'Georgi Gerganov'
99
- email = '[email protected]'
100
- }
101
- developer {
102
- id = 'nalbion'
103
- name = 'Nicholas Albion'
104
- email = '[email protected]'
105
- }
106
- }
107
- scm {
108
- connection = 'scm:git:git://github.com/ggerganov/whisper.cpp.git'
109
- url = 'https://github.com/ggerganov/whisper.cpp'
110
- }
111
- }
112
- }
113
- }
114
-
115
- repositories {
116
- maven {
117
- def releasesRepoUrl = 'https://s01.oss.sonatype.org/service/local/staging/deploy/maven2/'
118
- def snapshotsRepoUrl = 'https://s01.oss.sonatype.org/content/repositories/snapshots/'
119
- url = version.endsWith('-SNAPSHOT') ? snapshotsRepoUrl : releasesRepoUrl
120
- credentials {
121
- username = System.getenv("MAVEN_USERNAME")
122
- password = System.getenv("MAVEN_PASSWORD")
123
- }
124
- }
125
- }
126
- }
127
-
128
- signing {
129
- def signingKey = System.getenv("PGP_SECRET")
130
- def signingPassword = System.getenv("PGP_PASSPHRASE")
131
- useInMemoryPgpKeys(signingKey, signingPassword)
132
- sign publishing.publications.mavenJava
133
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/bindings/java/gradle.properties DELETED
@@ -1,6 +0,0 @@
1
- org.gradle.jvmargs=-Xms256m -Xmx1024m
2
- system.include.dir=/usr/include
3
- #system.local.include.dir=../../include
4
- system.local.include.dir=./build/generated/sources/headers/java/main
5
- jni.include.dir=/usr/lib/jvm/java-8-openjdk-amd64/include/
6
- jni.lib.dir=/usr/lib/jvm/java-8-openjdk-amd64/lib/
 
 
 
 
 
 
 
scripts/bindings/java/gradle/wrapper/gradle-wrapper.jar DELETED
Binary file (61.6 kB)
 
scripts/bindings/java/gradle/wrapper/gradle-wrapper.properties DELETED
@@ -1,6 +0,0 @@
1
- distributionBase=GRADLE_USER_HOME
2
- distributionPath=wrapper/dists
3
- distributionUrl=https\://services.gradle.org/distributions/gradle-8.1-bin.zip
4
- networkTimeout=10000
5
- zipStoreBase=GRADLE_USER_HOME
6
- zipStorePath=wrapper/dists
 
 
 
 
 
 
 
scripts/bindings/java/gradlew DELETED
@@ -1,244 +0,0 @@
1
- #!/bin/sh
2
-
3
- #
4
- # Copyright © 2015-2021 the original authors.
5
- #
6
- # Licensed under the Apache License, Version 2.0 (the "License");
7
- # you may not use this file except in compliance with the License.
8
- # You may obtain a copy of the License at
9
- #
10
- # https://www.apache.org/licenses/LICENSE-2.0
11
- #
12
- # Unless required by applicable law or agreed to in writing, software
13
- # distributed under the License is distributed on an "AS IS" BASIS,
14
- # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15
- # See the License for the specific language governing permissions and
16
- # limitations under the License.
17
- #
18
-
19
- ##############################################################################
20
- #
21
- # Gradle start up script for POSIX generated by Gradle.
22
- #
23
- # Important for running:
24
- #
25
- # (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is
26
- # noncompliant, but you have some other compliant shell such as ksh or
27
- # bash, then to run this script, type that shell name before the whole
28
- # command line, like:
29
- #
30
- # ksh Gradle
31
- #
32
- # Busybox and similar reduced shells will NOT work, because this script
33
- # requires all of these POSIX shell features:
34
- # * functions;
35
- # * expansions «$var», «${var}», «${var:-default}», «${var+SET}»,
36
- # «${var#prefix}», «${var%suffix}», and «$( cmd )»;
37
- # * compound commands having a testable exit status, especially «case»;
38
- # * various built-in commands including «command», «set», and «ulimit».
39
- #
40
- # Important for patching:
41
- #
42
- # (2) This script targets any POSIX shell, so it avoids extensions provided
43
- # by Bash, Ksh, etc; in particular arrays are avoided.
44
- #
45
- # The "traditional" practice of packing multiple parameters into a
46
- # space-separated string is a well documented source of bugs and security
47
- # problems, so this is (mostly) avoided, by progressively accumulating
48
- # options in "$@", and eventually passing that to Java.
49
- #
50
- # Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS,
51
- # and GRADLE_OPTS) rely on word-splitting, this is performed explicitly;
52
- # see the in-line comments for details.
53
- #
54
- # There are tweaks for specific operating systems such as AIX, CygWin,
55
- # Darwin, MinGW, and NonStop.
56
- #
57
- # (3) This script is generated from the Groovy template
58
- # https://github.com/gradle/gradle/blob/HEAD/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
59
- # within the Gradle project.
60
- #
61
- # You can find Gradle at https://github.com/gradle/gradle/.
62
- #
63
- ##############################################################################
64
-
65
- # Attempt to set APP_HOME
66
-
67
- # Resolve links: $0 may be a link
68
- app_path=$0
69
-
70
- # Need this for daisy-chained symlinks.
71
- while
72
- APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path
73
- [ -h "$app_path" ]
74
- do
75
- ls=$( ls -ld "$app_path" )
76
- link=${ls#*' -> '}
77
- case $link in #(
78
- /*) app_path=$link ;; #(
79
- *) app_path=$APP_HOME$link ;;
80
- esac
81
- done
82
-
83
- # This is normally unused
84
- # shellcheck disable=SC2034
85
- APP_BASE_NAME=${0##*/}
86
- APP_HOME=$( cd "${APP_HOME:-./}" && pwd -P ) || exit
87
-
88
- # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
89
- DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
90
-
91
- # Use the maximum available, or set MAX_FD != -1 to use that value.
92
- MAX_FD=maximum
93
-
94
- warn () {
95
- echo "$*"
96
- } >&2
97
-
98
- die () {
99
- echo
100
- echo "$*"
101
- echo
102
- exit 1
103
- } >&2
104
-
105
- # OS specific support (must be 'true' or 'false').
106
- cygwin=false
107
- msys=false
108
- darwin=false
109
- nonstop=false
110
- case "$( uname )" in #(
111
- CYGWIN* ) cygwin=true ;; #(
112
- Darwin* ) darwin=true ;; #(
113
- MSYS* | MINGW* ) msys=true ;; #(
114
- NONSTOP* ) nonstop=true ;;
115
- esac
116
-
117
- CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
118
-
119
-
120
- # Determine the Java command to use to start the JVM.
121
- if [ -n "$JAVA_HOME" ] ; then
122
- if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
123
- # IBM's JDK on AIX uses strange locations for the executables
124
- JAVACMD=$JAVA_HOME/jre/sh/java
125
- else
126
- JAVACMD=$JAVA_HOME/bin/java
127
- fi
128
- if [ ! -x "$JAVACMD" ] ; then
129
- die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
130
-
131
- Please set the JAVA_HOME variable in your environment to match the
132
- location of your Java installation."
133
- fi
134
- else
135
- JAVACMD=java
136
- which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
137
-
138
- Please set the JAVA_HOME variable in your environment to match the
139
- location of your Java installation."
140
- fi
141
-
142
- # Increase the maximum file descriptors if we can.
143
- if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then
144
- case $MAX_FD in #(
145
- max*)
146
- # In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked.
147
- # shellcheck disable=SC3045
148
- MAX_FD=$( ulimit -H -n ) ||
149
- warn "Could not query maximum file descriptor limit"
150
- esac
151
- case $MAX_FD in #(
152
- '' | soft) :;; #(
153
- *)
154
- # In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked.
155
- # shellcheck disable=SC3045
156
- ulimit -n "$MAX_FD" ||
157
- warn "Could not set maximum file descriptor limit to $MAX_FD"
158
- esac
159
- fi
160
-
161
- # Collect all arguments for the java command, stacking in reverse order:
162
- # * args from the command line
163
- # * the main class name
164
- # * -classpath
165
- # * -D...appname settings
166
- # * --module-path (only if needed)
167
- # * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables.
168
-
169
- # For Cygwin or MSYS, switch paths to Windows format before running java
170
- if "$cygwin" || "$msys" ; then
171
- APP_HOME=$( cygpath --path --mixed "$APP_HOME" )
172
- CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" )
173
-
174
- JAVACMD=$( cygpath --unix "$JAVACMD" )
175
-
176
- # Now convert the arguments - kludge to limit ourselves to /bin/sh
177
- for arg do
178
- if
179
- case $arg in #(
180
- -*) false ;; # don't mess with options #(
181
- /?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath
182
- [ -e "$t" ] ;; #(
183
- *) false ;;
184
- esac
185
- then
186
- arg=$( cygpath --path --ignore --mixed "$arg" )
187
- fi
188
- # Roll the args list around exactly as many times as the number of
189
- # args, so each arg winds up back in the position where it started, but
190
- # possibly modified.
191
- #
192
- # NB: a `for` loop captures its iteration list before it begins, so
193
- # changing the positional parameters here affects neither the number of
194
- # iterations, nor the values presented in `arg`.
195
- shift # remove old arg
196
- set -- "$@" "$arg" # push replacement arg
197
- done
198
- fi
199
-
200
- # Collect all arguments for the java command;
201
- # * $DEFAULT_JVM_OPTS, $JAVA_OPTS, and $GRADLE_OPTS can contain fragments of
202
- # shell script including quotes and variable substitutions, so put them in
203
- # double quotes to make sure that they get re-expanded; and
204
- # * put everything else in single quotes, so that it's not re-expanded.
205
-
206
- set -- \
207
- "-Dorg.gradle.appname=$APP_BASE_NAME" \
208
- -classpath "$CLASSPATH" \
209
- org.gradle.wrapper.GradleWrapperMain \
210
- "$@"
211
-
212
- # Stop when "xargs" is not available.
213
- if ! command -v xargs >/dev/null 2>&1
214
- then
215
- die "xargs is not available"
216
- fi
217
-
218
- # Use "xargs" to parse quoted args.
219
- #
220
- # With -n1 it outputs one arg per line, with the quotes and backslashes removed.
221
- #
222
- # In Bash we could simply go:
223
- #
224
- # readarray ARGS < <( xargs -n1 <<<"$var" ) &&
225
- # set -- "${ARGS[@]}" "$@"
226
- #
227
- # but POSIX shell has neither arrays nor command substitution, so instead we
228
- # post-process each arg (as a line of input to sed) to backslash-escape any
229
- # character that might be a shell metacharacter, then use eval to reverse
230
- # that process (while maintaining the separation between arguments), and wrap
231
- # the whole thing up as a single "set" statement.
232
- #
233
- # This will of course break if any of these variables contains a newline or
234
- # an unmatched quote.
235
- #
236
-
237
- eval "set -- $(
238
- printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" |
239
- xargs -n1 |
240
- sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' |
241
- tr '\n' ' '
242
- )" '"$@"'
243
-
244
- exec "$JAVACMD" "$@"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/bindings/java/gradlew.bat DELETED
@@ -1,92 +0,0 @@
1
- @rem
2
- @rem Copyright 2015 the original author or authors.
3
- @rem
4
- @rem Licensed under the Apache License, Version 2.0 (the "License");
5
- @rem you may not use this file except in compliance with the License.
6
- @rem You may obtain a copy of the License at
7
- @rem
8
- @rem https://www.apache.org/licenses/LICENSE-2.0
9
- @rem
10
- @rem Unless required by applicable law or agreed to in writing, software
11
- @rem distributed under the License is distributed on an "AS IS" BASIS,
12
- @rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
- @rem See the License for the specific language governing permissions and
14
- @rem limitations under the License.
15
- @rem
16
-
17
- @if "%DEBUG%"=="" @echo off
18
- @rem ##########################################################################
19
- @rem
20
- @rem Gradle startup script for Windows
21
- @rem
22
- @rem ##########################################################################
23
-
24
- @rem Set local scope for the variables with windows NT shell
25
- if "%OS%"=="Windows_NT" setlocal
26
-
27
- set DIRNAME=%~dp0
28
- if "%DIRNAME%"=="" set DIRNAME=.
29
- @rem This is normally unused
30
- set APP_BASE_NAME=%~n0
31
- set APP_HOME=%DIRNAME%
32
-
33
- @rem Resolve any "." and ".." in APP_HOME to make it shorter.
34
- for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi
35
-
36
- @rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
37
- set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m"
38
-
39
- @rem Find java.exe
40
- if defined JAVA_HOME goto findJavaFromJavaHome
41
-
42
- set JAVA_EXE=java.exe
43
- %JAVA_EXE% -version >NUL 2>&1
44
- if %ERRORLEVEL% equ 0 goto execute
45
-
46
- echo.
47
- echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
48
- echo.
49
- echo Please set the JAVA_HOME variable in your environment to match the
50
- echo location of your Java installation.
51
-
52
- goto fail
53
-
54
- :findJavaFromJavaHome
55
- set JAVA_HOME=%JAVA_HOME:"=%
56
- set JAVA_EXE=%JAVA_HOME%/bin/java.exe
57
-
58
- if exist "%JAVA_EXE%" goto execute
59
-
60
- echo.
61
- echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
62
- echo.
63
- echo Please set the JAVA_HOME variable in your environment to match the
64
- echo location of your Java installation.
65
-
66
- goto fail
67
-
68
- :execute
69
- @rem Setup the command line
70
-
71
- set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
72
-
73
-
74
- @rem Execute Gradle
75
- "%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*
76
-
77
- :end
78
- @rem End local scope for the variables with windows NT shell
79
- if %ERRORLEVEL% equ 0 goto mainEnd
80
-
81
- :fail
82
- rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
83
- rem the _cmd.exe /c_ return code!
84
- set EXIT_CODE=%ERRORLEVEL%
85
- if %EXIT_CODE% equ 0 set EXIT_CODE=1
86
- if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE%
87
- exit /b %EXIT_CODE%
88
-
89
- :mainEnd
90
- if "%OS%"=="Windows_NT" endlocal
91
-
92
- :omega
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
scripts/bindings/java/settings.gradle DELETED
@@ -1 +0,0 @@
1
- rootProject.name = "whispercpp"