Fedir Zadniprovskyi commited on
Commit
d0feed8
·
1 Parent(s): aa5390b

feat: add more pre-commit hooks

Browse files
.pre-commit-config.yaml CHANGED
@@ -8,21 +8,41 @@ repos:
8
  - id: end-of-file-fixer
9
  - id: check-yaml
10
  - id: check-added-large-files
11
- # TODO: enable
12
- # - repo: https://github.com/pre-commit/mirrors-mypy
13
- # rev: v1.10.0
14
- # hooks:
15
- # - id: mypy
16
- # args: [--strict]
17
- # TODO: enable
18
- # - repo: https://github.com/RobertCraigie/pyright-python
19
- # rev: v1.1.363
20
- # hooks:
21
- # - id: pyright
22
- # Disabled because it doesn't work on NixOS
23
- # - repo: https://github.com/astral-sh/ruff-pre-commit
24
- # rev: v0.4.4
25
- # hooks:
26
- # - id: ruff # linter
27
- # args: [--fix]
28
- # - id: ruff-format
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  - id: end-of-file-fixer
9
  - id: check-yaml
10
  - id: check-added-large-files
11
+ - repo: https://github.com/python-jsonschema/check-jsonschema
12
+ rev: 0.28.4
13
+ hooks:
14
+ - id: check-taskfile
15
+ - repo: https://github.com/rhysd/actionlint
16
+ rev: v1.7.0
17
+ hooks:
18
+ - id: actionlint
19
+ - repo: https://github.com/IamTheFij/docker-pre-commit
20
+ rev: v3.0.1
21
+ hooks:
22
+ - id: docker-compose-check
23
+ - repo: https://github.com/hadolint/hadolint
24
+ rev: v2.12.0
25
+ hooks:
26
+ - id: hadolint
27
+ - repo: https://github.com/shellcheck-py/shellcheck-py
28
+ rev: v0.10.0.1
29
+ hooks:
30
+ - id: shellcheck
31
+ # NOTE: not using https://github.com/RobertCraigie/pyright-python because it doesn't work with poetry virtual environments
32
+ # NOTE: not using github.com/astral-sh/ruff-pre-commit because it doesn't work on NixOS
33
+ - repo: local
34
+ hooks:
35
+ - id: pyright
36
+ name: pyright
37
+ entry: ./pre-commit-scripts/pyright.sh
38
+ language: script
39
+ pass_filenames: false
40
+ - id: ruff-lint
41
+ name: ruff-lint
42
+ entry: ./pre-commit-scripts/ruff-lint.sh
43
+ pass_filenames: false
44
+ language: script
45
+ - id: ruff-format
46
+ name: ruff-format
47
+ entry: ./pre-commit-scripts/ruff-format.sh
48
+ language: script
Dockerfile.cpu CHANGED
@@ -1,9 +1,12 @@
1
  FROM ubuntu:22.04
 
2
  RUN apt-get update && \
3
- apt-get install -y curl software-properties-common && \
4
  add-apt-repository ppa:deadsnakes/ppa && \
5
  apt-get update && \
6
- DEBIAN_FRONTEND=noninteractive apt-get -y install python3.11 python3.11-distutils && \
 
 
7
  curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11
8
  RUN pip install --no-cache-dir poetry==1.8.2
9
  WORKDIR /root/speaches
 
1
  FROM ubuntu:22.04
2
+ # hadolint ignore=DL3008,DL4006
3
  RUN apt-get update && \
4
+ apt-get install -y --no-install-recommends curl software-properties-common && \
5
  add-apt-repository ppa:deadsnakes/ppa && \
6
  apt-get update && \
7
+ DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends python3.11 python3.11-distutils && \
8
+ apt-get clean && \
9
+ rm -rf /var/lib/apt/lists/* && \
10
  curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11
11
  RUN pip install --no-cache-dir poetry==1.8.2
12
  WORKDIR /root/speaches
Dockerfile.cuda CHANGED
@@ -1,9 +1,12 @@
1
  FROM nvidia/cuda:12.2.2-cudnn8-runtime-ubuntu22.04
 
2
  RUN apt-get update && \
3
- apt-get install -y curl software-properties-common && \
4
  add-apt-repository ppa:deadsnakes/ppa && \
5
  apt-get update && \
6
- DEBIAN_FRONTEND=noninteractive apt-get -y install python3.11 python3.11-distutils && \
 
 
7
  curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11
8
  RUN pip install --no-cache-dir poetry==1.8.2
9
  WORKDIR /root/speaches
 
1
  FROM nvidia/cuda:12.2.2-cudnn8-runtime-ubuntu22.04
2
+ # hadolint ignore=DL3008,DL4006
3
  RUN apt-get update && \
4
+ apt-get install -y --no-install-recommends curl software-properties-common && \
5
  add-apt-repository ppa:deadsnakes/ppa && \
6
  apt-get update && \
7
+ DEBIAN_FRONTEND=noninteractive apt-get install -y --no-install-recommends python3.11 python3.11-distutils && \
8
+ apt-get clean && \
9
+ rm -rf /var/lib/apt/lists/* && \
10
  curl -sS https://bootstrap.pypa.io/get-pip.py | python3.11
11
  RUN pip install --no-cache-dir poetry==1.8.2
12
  WORKDIR /root/speaches
pre-commit-scripts/pyright.sh ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ # shellcheck disable=SC1091
3
+ source "$(poetry env info --path)"/bin/activate
4
+ pyright
pre-commit-scripts/ruff-format.sh ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ #!/usr/bin/env bash
2
+ ruff format
pre-commit-scripts/ruff-lint.sh ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ #!/usr/bin/env bash
2
+ ruff check --fix
pyproject.toml CHANGED
@@ -22,6 +22,11 @@ youtube-dl = {git = "https://github.com/ytdl-org/youtube-dl.git"}
22
  [tool.ruff]
23
  target-version = "py311"
24
 
 
 
 
 
 
25
  [build-system]
26
  requires = ["poetry-core"]
27
  build-backend = "poetry.core.masonry.api"
 
22
  [tool.ruff]
23
  target-version = "py311"
24
 
25
+ [tool.pyright]
26
+ # typeCheckingMode = "strict"
27
+ pythonVersion = "3.11"
28
+ pythonPlatform = "Linux"
29
+
30
  [build-system]
31
  requires = ["poetry-core"]
32
  build-backend = "poetry.core.masonry.api"
speaches/main.py CHANGED
@@ -6,8 +6,15 @@ from contextlib import asynccontextmanager
6
  from io import BytesIO
7
  from typing import Annotated, Literal, OrderedDict
8
 
9
- from fastapi import (FastAPI, Form, Query, Response, UploadFile, WebSocket,
10
- WebSocketDisconnect)
 
 
 
 
 
 
 
11
  from fastapi.responses import StreamingResponse
12
  from fastapi.websockets import WebSocketState
13
  from faster_whisper import WhisperModel
@@ -16,11 +23,12 @@ from faster_whisper.vad import VadOptions, get_speech_timestamps
16
  from speaches import utils
17
  from speaches.asr import FasterWhisperASR
18
  from speaches.audio import AudioStream, audio_samples_from_file
19
- from speaches.config import (SAMPLES_PER_SECOND, Language, Model,
20
- ResponseFormat, config)
21
  from speaches.logger import logger
22
- from speaches.server_models import (TranscriptionJsonResponse,
23
- TranscriptionVerboseJsonResponse)
 
 
24
  from speaches.transcriber import audio_transcriber
25
 
26
  models: OrderedDict[Model, WhisperModel] = OrderedDict()
 
6
  from io import BytesIO
7
  from typing import Annotated, Literal, OrderedDict
8
 
9
+ from fastapi import (
10
+ FastAPI,
11
+ Form,
12
+ Query,
13
+ Response,
14
+ UploadFile,
15
+ WebSocket,
16
+ WebSocketDisconnect,
17
+ )
18
  from fastapi.responses import StreamingResponse
19
  from fastapi.websockets import WebSocketState
20
  from faster_whisper import WhisperModel
 
23
  from speaches import utils
24
  from speaches.asr import FasterWhisperASR
25
  from speaches.audio import AudioStream, audio_samples_from_file
26
+ from speaches.config import SAMPLES_PER_SECOND, Language, Model, ResponseFormat, config
 
27
  from speaches.logger import logger
28
+ from speaches.server_models import (
29
+ TranscriptionJsonResponse,
30
+ TranscriptionVerboseJsonResponse,
31
+ )
32
  from speaches.transcriber import audio_transcriber
33
 
34
  models: OrderedDict[Model, WhisperModel] = OrderedDict()
speaches/server_models.py CHANGED
@@ -85,7 +85,7 @@ class TranscriptionVerboseJsonResponse(BaseModel):
85
  text=segment.text,
86
  words=(
87
  [WordObject.from_word(word) for word in segment.words]
88
- if type(segment.words) == list
89
  else []
90
  ),
91
  segments=[SegmentObject.from_segment(segment)],
 
85
  text=segment.text,
86
  words=(
87
  [WordObject.from_word(word) for word in segment.words]
88
+ if isinstance(segment.words, list)
89
  else []
90
  ),
91
  segments=[SegmentObject.from_segment(segment)],
tests/app_test.py CHANGED
@@ -12,7 +12,7 @@ from starlette.testclient import WebSocketTestSession
12
 
13
  from speaches.config import BYTES_PER_SECOND
14
  from speaches.main import app
15
- from speaches.server_models import TranscriptionVerboseResponse
16
 
17
  SIMILARITY_THRESHOLD = 0.97
18
  AUDIO_FILES_LIMIT = 5
@@ -54,13 +54,13 @@ def stream_audio_data(
54
 
55
  def transcribe_audio_data(
56
  client: TestClient, data: bytes
57
- ) -> TranscriptionVerboseResponse:
58
  response = client.post(
59
  TRANSCRIBE_ENDPOINT,
60
  files={"file": ("audio.raw", data, "audio/raw")},
61
  )
62
  data = json.loads(response.json()) # TODO: figure this out
63
- return TranscriptionVerboseResponse(**data) # type: ignore
64
 
65
 
66
  @pytest.mark.parametrize("file_path", file_paths)
@@ -70,14 +70,16 @@ def test_ws_audio_transcriptions(
70
  with open(file_path, "rb") as file:
71
  data = file.read()
72
 
73
- streaming_transcription: TranscriptionVerboseResponse = None # type: ignore
74
  thread = threading.Thread(
75
  target=stream_audio_data, args=(ws, data), kwargs={"speed": 4.0}
76
  )
77
  thread.start()
78
  while True:
79
  try:
80
- streaming_transcription = TranscriptionVerboseResponse(**ws.receive_json())
 
 
81
  except WebSocketDisconnect:
82
  break
83
  file_transcription = transcribe_audio_data(client, data)
 
12
 
13
  from speaches.config import BYTES_PER_SECOND
14
  from speaches.main import app
15
+ from speaches.server_models import TranscriptionVerboseJsonResponse
16
 
17
  SIMILARITY_THRESHOLD = 0.97
18
  AUDIO_FILES_LIMIT = 5
 
54
 
55
  def transcribe_audio_data(
56
  client: TestClient, data: bytes
57
+ ) -> TranscriptionVerboseJsonResponse:
58
  response = client.post(
59
  TRANSCRIBE_ENDPOINT,
60
  files={"file": ("audio.raw", data, "audio/raw")},
61
  )
62
  data = json.loads(response.json()) # TODO: figure this out
63
+ return TranscriptionVerboseJsonResponse(**data) # type: ignore
64
 
65
 
66
  @pytest.mark.parametrize("file_path", file_paths)
 
70
  with open(file_path, "rb") as file:
71
  data = file.read()
72
 
73
+ streaming_transcription: TranscriptionVerboseJsonResponse = None # type: ignore
74
  thread = threading.Thread(
75
  target=stream_audio_data, args=(ws, data), kwargs={"speed": 4.0}
76
  )
77
  thread.start()
78
  while True:
79
  try:
80
+ streaming_transcription = TranscriptionVerboseJsonResponse(
81
+ **ws.receive_json()
82
+ )
83
  except WebSocketDisconnect:
84
  break
85
  file_transcription = transcribe_audio_data(client, data)