Spaces:

fedirz
/

faster-whisper-server

Configuration error

App Files Files Community

Fedir Zadniprovskyi commited on May 26, 2024

Commit

d0feed8

1 Parent(s): aa5390b

feat: add more pre-commit hooks

Browse files

Files changed (10) hide show

.pre-commit-config.yaml +38 -18
Dockerfile.cpu +5 -2
Dockerfile.cuda +5 -2
pre-commit-scripts/pyright.sh +4 -0
pre-commit-scripts/ruff-format.sh +2 -0
pre-commit-scripts/ruff-lint.sh +2 -0
pyproject.toml +5 -0
speaches/main.py +14 -6
speaches/server_models.py +1 -1
tests/app_test.py +7 -5

.pre-commit-config.yaml CHANGED Viewed

@@ -8,21 +8,41 @@ repos:
       - id: end-of-file-fixer
       - id: check-yaml
       - id: check-added-large-files
-  # TODO: enable
-  # - repo: https://github.com/pre-commit/mirrors-mypy
-  #   rev: v1.10.0
-  #   hooks:
-  #     - id: mypy
-  #       args: [--strict]
-  # TODO: enable
-  # - repo: https://github.com/RobertCraigie/pyright-python
-  #   rev: v1.1.363
-  #   hooks:
-  #   - id: pyright
-  # Disabled because it doesn't work on NixOS
-  # - repo: https://github.com/astral-sh/ruff-pre-commit
-  #   rev: v0.4.4
-  #   hooks:
-  #     - id: ruff # linter
-  #       args: [--fix]
-  #     - id: ruff-format

       - id: end-of-file-fixer
       - id: check-yaml
       - id: check-added-large-files
+  - repo: https://github.com/python-jsonschema/check-jsonschema
+    rev: 0.28.4
+    hooks:
+      - id: check-taskfile
+  - repo: https://github.com/rhysd/actionlint
+    rev: v1.7.0
+    hooks:
+      - id: actionlint
+  - repo: https://github.com/IamTheFij/docker-pre-commit
+    rev: v3.0.1
+    hooks:
+      - id: docker-compose-check
+  - repo: https://github.com/hadolint/hadolint
+    rev: v2.12.0
+    hooks:
+      - id: hadolint
+  - repo: https://github.com/shellcheck-py/shellcheck-py
+    rev: v0.10.0.1
+    hooks:
+      - id: shellcheck
+  # NOTE: not using https://github.com/RobertCraigie/pyright-python because it doesn't work with poetry virtual environments
+  # NOTE: not using github.com/astral-sh/ruff-pre-commit because it doesn't work on NixOS
+  - repo: local
+    hooks:
+      - id: pyright
+        name: pyright
+        entry: ./pre-commit-scripts/pyright.sh
+        language: script
+        pass_filenames: false
+      - id: ruff-lint
+        name: ruff-lint
+        entry: ./pre-commit-scripts/ruff-lint.sh
+        pass_filenames: false
+        language: script
+      - id: ruff-format
+        name: ruff-format
+        entry: ./pre-commit-scripts/ruff-format.sh
+        language: script

Dockerfile.cpu CHANGED Viewed

@@ -1,9 +1,12 @@
 FROM ubuntu:22.04
 RUN apt-get update && \
-    apt-get install -y curl software-properties-common && \
     add-apt-repository ppa:deadsnakes/ppa && \
     apt-get update && \
-    DEBIAN_FRONTEND=noninteractive apt-get -y install python3.11 python3.11-distutils && \
     curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11
 RUN pip install --no-cache-dir poetry==1.8.2
 WORKDIR /root/speaches

 FROM ubuntu:22.04
+# hadolint ignore=DL3008,DL4006
 RUN apt-get update && \
+    apt-get install -y --no-install-recommends curl software-properties-common && \
     add-apt-repository ppa:deadsnakes/ppa && \
     apt-get update && \
+    DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends python3.11 python3.11-distutils && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/* && \
     curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11
 RUN pip install --no-cache-dir poetry==1.8.2
 WORKDIR /root/speaches

Dockerfile.cuda CHANGED Viewed

@@ -1,9 +1,12 @@
 FROM nvidia/cuda:12.2.2-cudnn8-runtime-ubuntu22.04
 RUN apt-get update && \
-    apt-get install -y curl software-properties-common && \
     add-apt-repository ppa:deadsnakes/ppa && \
     apt-get update && \
-    DEBIAN_FRONTEND=noninteractive apt-get -y install python3.11 python3.11-distutils && \
     curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11
 RUN pip install --no-cache-dir poetry==1.8.2
 WORKDIR /root/speaches

 FROM nvidia/cuda:12.2.2-cudnn8-runtime-ubuntu22.04
+# hadolint ignore=DL3008,DL4006
 RUN apt-get update && \
+    apt-get install -y --no-install-recommends curl software-properties-common && \
     add-apt-repository ppa:deadsnakes/ppa && \
     apt-get update && \
+    DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends python3.11 python3.11-distutils && \
+    apt-get clean && \
+    rm -rf /var/lib/apt/lists/* && \
     curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11
 RUN pip install --no-cache-dir poetry==1.8.2
 WORKDIR /root/speaches

pre-commit-scripts/pyright.sh ADDED Viewed

	@@ -0,0 +1,4 @@

+#!/usr/bin/env bash
+# shellcheck disable=SC1091
+source "$(poetry env info --path)"/bin/activate
+pyright

pre-commit-scripts/ruff-format.sh ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ #!/usr/bin/env bash
2	+ ruff format

pre-commit-scripts/ruff-lint.sh ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ #!/usr/bin/env bash
2	+ ruff check --fix

pyproject.toml CHANGED Viewed

@@ -22,6 +22,11 @@ youtube-dl = {git = "https://github.com/ytdl-org/youtube-dl.git"}
 [tool.ruff]
 target-version = "py311"
 [build-system]
 requires = ["poetry-core"]
 build-backend = "poetry.core.masonry.api"

 [tool.ruff]
 target-version = "py311"
+[tool.pyright]
+# typeCheckingMode = "strict"
+pythonVersion = "3.11"
+pythonPlatform = "Linux"
 [build-system]
 requires = ["poetry-core"]
 build-backend = "poetry.core.masonry.api"

speaches/main.py CHANGED Viewed

@@ -6,8 +6,15 @@ from contextlib import asynccontextmanager
 from io import BytesIO
 from typing import Annotated, Literal, OrderedDict
-from fastapi import (FastAPI, Form, Query, Response, UploadFile, WebSocket,
-                     WebSocketDisconnect)
 from fastapi.responses import StreamingResponse
 from fastapi.websockets import WebSocketState
 from faster_whisper import WhisperModel
@@ -16,11 +23,12 @@ from faster_whisper.vad import VadOptions, get_speech_timestamps
 from speaches import utils
 from speaches.asr import FasterWhisperASR
 from speaches.audio import AudioStream, audio_samples_from_file
-from speaches.config import (SAMPLES_PER_SECOND, Language, Model,
-                             ResponseFormat, config)
 from speaches.logger import logger
-from speaches.server_models import (TranscriptionJsonResponse,
-                                    TranscriptionVerboseJsonResponse)
 from speaches.transcriber import audio_transcriber
 models: OrderedDict[Model, WhisperModel] = OrderedDict()

 from io import BytesIO
 from typing import Annotated, Literal, OrderedDict
+from fastapi import (
+    FastAPI,
+    Form,
+    Query,
+    Response,
+    UploadFile,
+    WebSocket,
+    WebSocketDisconnect,
+)
 from fastapi.responses import StreamingResponse
 from fastapi.websockets import WebSocketState
 from faster_whisper import WhisperModel
 from speaches import utils
 from speaches.asr import FasterWhisperASR
 from speaches.audio import AudioStream, audio_samples_from_file
+from speaches.config import SAMPLES_PER_SECOND, Language, Model, ResponseFormat, config
 from speaches.logger import logger
+from speaches.server_models import (
+    TranscriptionJsonResponse,
+    TranscriptionVerboseJsonResponse,
+)
 from speaches.transcriber import audio_transcriber
 models: OrderedDict[Model, WhisperModel] = OrderedDict()

speaches/server_models.py CHANGED Viewed

@@ -85,7 +85,7 @@ class TranscriptionVerboseJsonResponse(BaseModel):
             text=segment.text,
             words=(
                 [WordObject.from_word(word) for word in segment.words]
-                if type(segment.words) == list
                 else []
             ),
             segments=[SegmentObject.from_segment(segment)],

             text=segment.text,
             words=(
                 [WordObject.from_word(word) for word in segment.words]
+                if isinstance(segment.words, list)
                 else []
             ),
             segments=[SegmentObject.from_segment(segment)],

tests/app_test.py CHANGED Viewed

@@ -12,7 +12,7 @@ from starlette.testclient import WebSocketTestSession
 from speaches.config import BYTES_PER_SECOND
 from speaches.main import app
-from speaches.server_models import TranscriptionVerboseResponse
 SIMILARITY_THRESHOLD = 0.97
 AUDIO_FILES_LIMIT = 5
@@ -54,13 +54,13 @@ def stream_audio_data(
 def transcribe_audio_data(
     client: TestClient, data: bytes
-) -> TranscriptionVerboseResponse:
     response = client.post(
         TRANSCRIBE_ENDPOINT,
         files={"file": ("audio.raw", data, "audio/raw")},
     )
     data = json.loads(response.json())  # TODO: figure this out
-    return TranscriptionVerboseResponse(**data)  # type: ignore
 @pytest.mark.parametrize("file_path", file_paths)
@@ -70,14 +70,16 @@ def test_ws_audio_transcriptions(
     with open(file_path, "rb") as file:
         data = file.read()
-    streaming_transcription: TranscriptionVerboseResponse = None  # type: ignore
     thread = threading.Thread(
         target=stream_audio_data, args=(ws, data), kwargs={"speed": 4.0}
     )
     thread.start()
     while True:
         try:
-            streaming_transcription = TranscriptionVerboseResponse(**ws.receive_json())
         except WebSocketDisconnect:
             break
     file_transcription = transcribe_audio_data(client, data)

 from speaches.config import BYTES_PER_SECOND
 from speaches.main import app
+from speaches.server_models import TranscriptionVerboseJsonResponse
 SIMILARITY_THRESHOLD = 0.97
 AUDIO_FILES_LIMIT = 5
 def transcribe_audio_data(
     client: TestClient, data: bytes
+) -> TranscriptionVerboseJsonResponse:
     response = client.post(
         TRANSCRIBE_ENDPOINT,
         files={"file": ("audio.raw", data, "audio/raw")},
     )
     data = json.loads(response.json())  # TODO: figure this out
+    return TranscriptionVerboseJsonResponse(**data)  # type: ignore
 @pytest.mark.parametrize("file_path", file_paths)
     with open(file_path, "rb") as file:
         data = file.read()
+    streaming_transcription: TranscriptionVerboseJsonResponse = None  # type: ignore
     thread = threading.Thread(
         target=stream_audio_data, args=(ws, data), kwargs={"speed": 4.0}
     )
     thread.start()
     while True:
         try:
+            streaming_transcription = TranscriptionVerboseJsonResponse(
+                **ws.receive_json()
+            )
         except WebSocketDisconnect:
             break
     file_transcription = transcribe_audio_data(client, data)