aleger commited on
Commit
1227edc
·
1 Parent(s): 8224323

initial commit

Browse files
Dockerfile ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ===========================================
2
+ #
3
+ # THIS IS A GENERATED DOCKERFILE. DO NOT EDIT
4
+ #
5
+ # ===========================================
6
+
7
+ # Block SETUP_BENTO_BASE_IMAGE
8
+ FROM python:3.10-slim as base-container
9
+
10
+ ENV LANG=C.UTF-8
11
+
12
+ ENV LC_ALL=C.UTF-8
13
+
14
+ ENV PYTHONIOENCODING=UTF-8
15
+
16
+ ENV PYTHONUNBUFFERED=1
17
+
18
+
19
+
20
+ USER root
21
+
22
+ ENV DEBIAN_FRONTEND=noninteractive
23
+ RUN rm -f /etc/apt/apt.conf.d/docker-clean; echo 'Binary::apt::APT::Keep-Downloaded-Packages "true";' > /etc/apt/apt.conf.d/keep-cache
24
+ RUN set -eux && \
25
+ apt-get update -y && \
26
+ apt-get install -q -y --no-install-recommends --allow-remove-essential \
27
+ ca-certificates gnupg2 bash build-essential ffmpeg
28
+ # Block SETUP_BENTO_USER
29
+ ARG BENTO_USER=bentoml
30
+ ARG BENTO_USER_UID=1000
31
+ ARG BENTO_USER_GID=1000
32
+ RUN groupadd -g $BENTO_USER_GID -o $BENTO_USER && useradd -m -u $BENTO_USER_UID -g $BENTO_USER_GID -o -r $BENTO_USER
33
+ ENV BENTOML_CONFIG=src/configuration.yaml
34
+ ARG BENTO_PATH=/home/bentoml/bento
35
+ ENV BENTO_PATH=$BENTO_PATH
36
+ ENV BENTOML_HOME=/home/bentoml/
37
+
38
+ RUN mkdir $BENTO_PATH && chown bentoml:bentoml $BENTO_PATH -R
39
+ WORKDIR $BENTO_PATH
40
+
41
+
42
+
43
+ # Block SETUP_BENTO_COMPONENTS
44
+ COPY --chown=bentoml:bentoml ./env/python ./env/python/
45
+ # install python packages with install.sh
46
+ RUN bash -euxo pipefail /home/bentoml/bento/env/python/install.sh
47
+ COPY --chown=bentoml:bentoml . ./
48
+
49
+ # Block SETUP_BENTO_ENTRYPOINT
50
+ RUN rm -rf /var/lib/{apt,cache,log}
51
+ # Default port for BentoServer
52
+ EXPOSE 7860
53
+
54
+ # Expose Prometheus port
55
+ EXPOSE 3001
56
+
57
+ RUN chmod +x /home/bentoml/bento/env/docker/entrypoint.sh
58
+
59
+ USER bentoml
60
+
61
+ ENTRYPOINT [ "/home/bentoml/bento/env/docker/entrypoint.sh" ]
62
+
63
+
apis/openapi.yaml ADDED
@@ -0,0 +1,219 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ components:
2
+ schemas:
3
+ InternalServerError:
4
+ description: Internal Server Error
5
+ properties:
6
+ msg:
7
+ title: Message
8
+ type: string
9
+ type:
10
+ title: Error Type
11
+ type: string
12
+ required:
13
+ - msg
14
+ - type
15
+ title: InternalServerError
16
+ type: object
17
+ InvalidArgument:
18
+ description: Bad Request
19
+ properties:
20
+ msg:
21
+ title: Message
22
+ type: string
23
+ type:
24
+ title: Error Type
25
+ type: string
26
+ required:
27
+ - msg
28
+ - type
29
+ title: InvalidArgument
30
+ type: object
31
+ NotFound:
32
+ description: Not Found
33
+ properties:
34
+ msg:
35
+ title: Message
36
+ type: string
37
+ type:
38
+ title: Error Type
39
+ type: string
40
+ required:
41
+ - msg
42
+ - type
43
+ title: NotFound
44
+ type: object
45
+ info:
46
+ contact:
47
48
+ name: BentoML Team
49
+ description: "# speech_to_text_pipeline:None\n\n[![pypi_status](https://img.shields.io/badge/BentoML-1.0.20-informational)](https://pypi.org/project/BentoML)\n\
50
+ [![documentation_status](https://readthedocs.org/projects/bentoml/badge/?version=latest)](https://docs.bentoml.org/)\n\
51
+ [![join_slack](https://badgen.net/badge/Join/BentoML%20Slack/cyan?icon=slack)](https://l.bentoml.com/join-slack-swagger)\n\
52
+ [![BentoML GitHub Repo](https://img.shields.io/github/stars/bentoml/bentoml?style=social)](https://github.com/bentoml/BentoML)\n\
53
+ [![Twitter Follow](https://img.shields.io/twitter/follow/bentomlai?label=Follow%20BentoML&style=social)](https://twitter.com/bentomlai)\n\
54
+ \nThis is a Machine Learning Service created with BentoML.\n| InferenceAPI | Input\
55
+ \ | Output |\n| ------------ | ----- | ------ |\n| POST [`/process_uploaded_file`](#operations-Service_APIs-speech_to_text_pipeline__process_uploaded_file)\
56
+ \ | BytesIOFile | JSON |\n| POST [`/zip_transcription`](#operations-Service_APIs-speech_to_text_pipeline__zip_transcription)\
57
+ \ | JSON | BytesIOFile |\n\n\n\n\n## Help\n\n* [\U0001F4D6 Documentation](https://docs.bentoml.org/en/latest/):\
58
+ \ Learn how to use BentoML.\n* [\U0001F4AC Community](https://l.bentoml.com/join-slack-swagger):\
59
+ \ Join the BentoML Slack community.\n* [\U0001F41B GitHub Issues](https://github.com/bentoml/BentoML/issues):\
60
+ \ Report bugs and feature requests.\n* Tip: you can also [customize this README](https://docs.bentoml.org/en/latest/concepts/bento.html#description).\n"
61
+ title: speech_to_text_pipeline
62
+ version: None
63
+ openapi: 3.0.2
64
+ paths:
65
+ /healthz:
66
+ get:
67
+ description: Health check endpoint. Expecting an empty response with status
68
+ code <code>200</code> when the service is in health state. The <code>/healthz</code>
69
+ endpoint is <b>deprecated</b>. (since Kubernetes v1.16)
70
+ responses:
71
+ '200':
72
+ description: Successful Response
73
+ tags:
74
+ - Infrastructure
75
+ /livez:
76
+ get:
77
+ description: Health check endpoint for Kubernetes. Healthy endpoint responses
78
+ with a <code>200</code> OK status.
79
+ responses:
80
+ '200':
81
+ description: Successful Response
82
+ tags:
83
+ - Infrastructure
84
+ /metrics:
85
+ get:
86
+ description: Prometheus metrics endpoint. The <code>/metrics</code> responses
87
+ with a <code>200</code>. The output can then be used by a Prometheus sidecar
88
+ to scrape the metrics of the service.
89
+ responses:
90
+ '200':
91
+ description: Successful Response
92
+ tags:
93
+ - Infrastructure
94
+ /process_uploaded_file:
95
+ post:
96
+ consumes:
97
+ - null
98
+ description: ''
99
+ operationId: speech_to_text_pipeline__process_uploaded_file
100
+ produces:
101
+ - application/json
102
+ requestBody:
103
+ content:
104
+ '*/*':
105
+ schema:
106
+ format: binary
107
+ type: string
108
+ required: true
109
+ x-bentoml-io-descriptor:
110
+ args:
111
+ kind: binaryio
112
+ mime_type: null
113
+ id: bentoml.io.File
114
+ responses:
115
+ 200:
116
+ content:
117
+ application/json:
118
+ schema:
119
+ type: object
120
+ description: Successful Response
121
+ x-bentoml-io-descriptor:
122
+ args:
123
+ has_json_encoder: true
124
+ has_pydantic_model: false
125
+ id: bentoml.io.JSON
126
+ 400:
127
+ content:
128
+ application/json:
129
+ schema:
130
+ $ref: '#/components/schemas/InvalidArgument'
131
+ description: Bad Request
132
+ 404:
133
+ content:
134
+ application/json:
135
+ schema:
136
+ $ref: '#/components/schemas/NotFound'
137
+ description: Not Found
138
+ 500:
139
+ content:
140
+ application/json:
141
+ schema:
142
+ $ref: '#/components/schemas/InternalServerError'
143
+ description: Internal Server Error
144
+ summary: "InferenceAPI(BytesIOFile \u2192 JSON)"
145
+ tags:
146
+ - Service APIs
147
+ x-bentoml-name: process_uploaded_file
148
+ /readyz:
149
+ get:
150
+ description: A <code>200</code> OK status from <code>/readyz</code> endpoint
151
+ indicated the service is ready to accept traffic. From that point and onward,
152
+ Kubernetes will use <code>/livez</code> endpoint to perform periodic health
153
+ checks.
154
+ responses:
155
+ '200':
156
+ description: Successful Response
157
+ tags:
158
+ - Infrastructure
159
+ /zip_transcription:
160
+ post:
161
+ consumes:
162
+ - application/json
163
+ description: ''
164
+ operationId: speech_to_text_pipeline__zip_transcription
165
+ produces:
166
+ - null
167
+ requestBody:
168
+ content:
169
+ application/json:
170
+ schema:
171
+ type: object
172
+ required: true
173
+ x-bentoml-io-descriptor:
174
+ args:
175
+ has_json_encoder: true
176
+ has_pydantic_model: false
177
+ id: bentoml.io.JSON
178
+ responses:
179
+ 200:
180
+ content:
181
+ '*/*':
182
+ schema:
183
+ format: binary
184
+ type: string
185
+ description: Successful Response
186
+ x-bentoml-io-descriptor:
187
+ args:
188
+ kind: binaryio
189
+ mime_type: null
190
+ id: bentoml.io.File
191
+ 400:
192
+ content:
193
+ application/json:
194
+ schema:
195
+ $ref: '#/components/schemas/InvalidArgument'
196
+ description: Bad Request
197
+ 404:
198
+ content:
199
+ application/json:
200
+ schema:
201
+ $ref: '#/components/schemas/NotFound'
202
+ description: Not Found
203
+ 500:
204
+ content:
205
+ application/json:
206
+ schema:
207
+ $ref: '#/components/schemas/InternalServerError'
208
+ description: Internal Server Error
209
+ summary: "InferenceAPI(JSON \u2192 BytesIOFile)"
210
+ tags:
211
+ - Service APIs
212
+ x-bentoml-name: zip_transcription
213
+ servers:
214
+ - url: .
215
+ tags:
216
+ - description: BentoML Service API endpoints for inference.
217
+ name: Service APIs
218
+ - description: Common infrastructure endpoints for observability.
219
+ name: Infrastructure
bento.yaml ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ service: service:svc
2
+ name: speech_to_text_pipeline
3
+ version: 4246aqrsa265utka
4
+ bentoml_version: 1.0.20
5
+ creation_time: '2023-08-03T14:06:11.828884+00:00'
6
+ labels:
7
+ owner: modern-ai-team
8
+ stage: dev
9
+ models: []
10
+ runners:
11
+ - name: audio_transcriber
12
+ runnable_type: AudioTranscriber
13
+ embedded: false
14
+ models: []
15
+ resource_config: null
16
+ - name: audio_amplitude
17
+ runnable_type: AudioAmplitude
18
+ embedded: false
19
+ models: []
20
+ resource_config: null
21
+ - name: keyword_extractor
22
+ runnable_type: KeywordExtractor
23
+ embedded: false
24
+ models: []
25
+ resource_config: null
26
+ - name: transcription_zipper
27
+ runnable_type: TranscriptionZipper
28
+ embedded: false
29
+ models: []
30
+ resource_config: null
31
+ apis:
32
+ - name: process_uploaded_file
33
+ input_type: BytesIOFile
34
+ output_type: JSON
35
+ - name: zip_transcription
36
+ input_type: JSON
37
+ output_type: BytesIOFile
38
+ docker:
39
+ distro: debian
40
+ python_version: '3.10'
41
+ cuda_version: null
42
+ env:
43
+ BENTOML_CONFIG: src/configuration.yaml
44
+ system_packages:
45
+ - ffmpeg
46
+ setup_script: null
47
+ base_image: null
48
+ dockerfile_template: null
49
+ python:
50
+ requirements_txt: ../requirements.txt
51
+ packages: null
52
+ lock_packages: null
53
+ index_url: null
54
+ no_index: null
55
+ trusted_host: null
56
+ find_links: null
57
+ extra_index_url: null
58
+ pip_args: null
59
+ wheels: null
60
+ conda:
61
+ environment_yml: null
62
+ channels: null
63
+ dependencies: null
64
+ pip: null
env/docker/entrypoint.sh ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ set -Eeuo pipefail
3
+
4
+ # check to see if this file is being run or sourced from another script
5
+ _is_sourced() {
6
+ # https://unix.stackexchange.com/a/215279
7
+ [ "${#FUNCNAME[@]}" -ge 2 ] &&
8
+ [ "${FUNCNAME[0]}" = '_is_sourced' ] &&
9
+ [ "${FUNCNAME[1]}" = 'source' ]
10
+ }
11
+
12
+ _main() {
13
+ # For backwards compatibility with the yatai<1.0.0, adapting the old "yatai" command to the new "start" command.
14
+ if [ "${#}" -gt 0 ] && [ "${1}" = 'python' ] && [ "${2}" = '-m' ] && { [ "${3}" = 'bentoml._internal.server.cli.runner' ] || [ "${3}" = "bentoml._internal.server.cli.api_server" ]; }; then # SC2235, use { } to avoid subshell overhead
15
+ if [ "${3}" = 'bentoml._internal.server.cli.runner' ]; then
16
+ set -- bentoml start-runner-server "${@:4}"
17
+ elif [ "${3}" = 'bentoml._internal.server.cli.api_server' ]; then
18
+ set -- bentoml start-http-server "${@:4}"
19
+ fi
20
+ # If no arg or first arg looks like a flag.
21
+ elif [[ "$#" -eq 0 ]] || [[ "${1:0:1}" =~ '-' ]]; then
22
+ # This is provided for backwards compatibility with places where user may have
23
+ # discover this easter egg and use it in their scripts to run the container.
24
+ if [[ -v BENTOML_SERVE_COMPONENT ]]; then
25
+ echo "\$BENTOML_SERVE_COMPONENT is set! Calling 'bentoml start-*' instead"
26
+ if [ "${BENTOML_SERVE_COMPONENT}" = 'http_server' ]; then
27
+ set -- bentoml start-http-server "$@" "$BENTO_PATH"
28
+ elif [ "${BENTOML_SERVE_COMPONENT}" = 'grpc_server' ]; then
29
+ set -- bentoml start-grpc-server "$@" "$BENTO_PATH"
30
+ elif [ "${BENTOML_SERVE_COMPONENT}" = 'runner' ]; then
31
+ set -- bentoml start-runner-server "$@" "$BENTO_PATH"
32
+ fi
33
+ else
34
+ set -- bentoml serve "$@" "$BENTO_PATH"
35
+ fi
36
+ fi
37
+ # Overide the BENTOML_PORT if PORT env var is present. Used for Heroku and Yatai.
38
+ if [[ -v PORT ]]; then
39
+ echo "\$PORT is set! Overiding \$BENTOML_PORT with \$PORT ($PORT)"
40
+ export BENTOML_PORT=$PORT
41
+ fi
42
+ # Handle serve and start commands that is passed to the container.
43
+ # Assuming that serve and start commands are the first arguments
44
+ # Note that this is the recommended way going forward to run all bentoml containers.
45
+ if [ "${#}" -gt 0 ] && { [ "${1}" = 'serve' ] || [ "${1}" = 'serve-http' ] || [ "${1}" = 'serve-grpc' ] || [ "${1}" = 'start-http-server' ] || [ "${1}" = 'start-grpc-server' ] || [ "${1}" = 'start-runner-server' ]; }; then
46
+ exec bentoml "$@" "$BENTO_PATH"
47
+ else
48
+ # otherwise default to run whatever the command is
49
+ # This should allow running bash, sh, python, etc
50
+ exec "$@"
51
+ fi
52
+ }
53
+
54
+ if ! _is_sourced; then
55
+ _main "$@"
56
+ fi
env/python/install.sh ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ set -exuo pipefail
3
+
4
+ # Parent directory https://stackoverflow.com/a/246128/8643197
5
+ BASEDIR="$( cd -- "$( dirname -- "${BASH_SOURCE[0]:-$0}"; )" &> /dev/null && pwd 2> /dev/null; )"
6
+
7
+ PIP_ARGS=(--no-warn-script-location)
8
+
9
+ # BentoML by default generates two requirement files:
10
+ # - ./env/python/requirements.lock.txt: all dependencies locked to its version presented during `build`
11
+ # - ./env/python/requirements.txt: all dependencies as user specified in code or requirements.txt file
12
+ REQUIREMENTS_TXT="$BASEDIR/requirements.txt"
13
+ REQUIREMENTS_LOCK="$BASEDIR/requirements.lock.txt"
14
+ WHEELS_DIR="$BASEDIR/wheels"
15
+ BENTOML_VERSION=${BENTOML_VERSION:-1.0.20}
16
+ # Install python packages, prefer installing the requirements.lock.txt file if it exist
17
+ if [ -f "$REQUIREMENTS_LOCK" ]; then
18
+ echo "Installing pip packages from 'requirements.lock.txt'.."
19
+ pip3 install -r "$REQUIREMENTS_LOCK" "${PIP_ARGS[@]}"
20
+ else
21
+ if [ -f "$REQUIREMENTS_TXT" ]; then
22
+ echo "Installing pip packages from 'requirements.txt'.."
23
+ pip3 install -r "$REQUIREMENTS_TXT" "${PIP_ARGS[@]}"
24
+ fi
25
+ fi
26
+
27
+ # Install user-provided wheels
28
+ if [ -d "$WHEELS_DIR" ]; then
29
+ echo "Installing wheels packaged in Bento.."
30
+ pip3 install "$WHEELS_DIR"/*.whl "${PIP_ARGS[@]}"
31
+ fi
32
+
33
+ # Install the BentoML from PyPI if it's not already installed
34
+ if python3 -c "import bentoml" &> /dev/null; then
35
+ existing_bentoml_version=$(python3 -c "import bentoml; print(bentoml.__version__)")
36
+ if [ "$existing_bentoml_version" != "$BENTOML_VERSION" ]; then
37
+ echo "WARNING: using BentoML version ${existing_bentoml_version}"
38
+ fi
39
+ else
40
+ pip3 install bentoml=="$BENTOML_VERSION"
41
+ fi
env/python/requirements.txt ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ bentoml==1.0.20
2
+ openai-whisper
3
+ faster-whisper
4
+ pydub==0.25.1
5
+ torch==2.0.1
6
+ torchvision==0.15.2
7
+ torchaudio==2.0.2
8
+ transformers==4.29.2
9
+ yake==0.4.8
10
+ fastapi
11
+ python-docx
env/python/version.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ 3.10.12
src/configuration.yaml ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ runners:
2
+ timeout: 900
3
+
4
+ api_server:
5
+ http:
6
+ cors:
7
+ enabled: True
8
+ access_control_allow_origins: [ "*" ]
9
+ access_control_allow_methods: ["GET", "OPTIONS", "POST", "HEAD", "PUT"]
10
+ access_control_allow_credentials: True
11
+ access_control_allow_headers: [ "*" ]
12
+ access_control_max_age: Null
13
+ # access_control_expose_headers: ["Content-Length"]
14
+ # default_max_request_size: 104857600
src/runners/__init__.py ADDED
File without changes
src/runners/audio_amplitude.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import bentoml
2
+ import numpy as np
3
+ from pydub import AudioSegment
4
+ from pydub.utils import mediainfo
5
+
6
+
7
+ class AudioAmplitude(bentoml.Runnable):
8
+ SUPPORTED_RESOURCES = ("nvidia.com/gpu", "cpu")
9
+ SUPPORTS_CPU_MULTI_THREADING = True
10
+
11
+ SAMPLE_RATE = 16000
12
+
13
+ def __init__(self):
14
+ pass
15
+
16
+ @bentoml.Runnable.method(batchable=False)
17
+ def get_audio_amplitude(self, temp_file_path):
18
+ # bit_rate = int(int(mediainfo(temp_file_path)["bit_rate"])/1000)
19
+ audio = AudioSegment.from_file(temp_file_path)
20
+
21
+ # get raw audio data as a bytestring
22
+ raw_data = audio.raw_data
23
+ # get the frame rate
24
+ sample_rate = audio.frame_rate
25
+ # get amount of bytes contained in one sample
26
+ sample_size = audio.sample_width
27
+ # get channels
28
+ channels = audio.channels
29
+
30
+ print("INFO: ", sample_rate, sample_size, channels)
31
+
32
+ audio_array = np.array(audio.get_array_of_samples())
33
+
34
+ # Normalize the audio array to values between -1 and 1
35
+ normalized_audio = audio_array / (2 ** 15) # Assuming 32-bit audio
36
+
37
+ # Convert stereo to mono (average the channels)
38
+ if audio.channels == 2:
39
+ normalized_audio = (normalized_audio[::2] + normalized_audio[1::2]) / 2
40
+
41
+ return normalized_audio
src/runners/audio_transcriber.py ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tempfile
2
+
3
+ import bentoml
4
+ import ffmpeg
5
+ import numpy as np
6
+ import torch
7
+ from faster_whisper import WhisperModel
8
+ from transformers import pipeline
9
+
10
+
11
+ class AudioTranscriber(bentoml.Runnable):
12
+ SUPPORTED_RESOURCES = ("nvidia.com/gpu", "cpu")
13
+ SUPPORTS_CPU_MULTI_THREADING = True
14
+
15
+ SAMPLE_RATE = 16000
16
+
17
+ def __init__(self):
18
+ self.faster_model = WhisperModel("base")
19
+ self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
20
+ # self.model.to(self.device)
21
+
22
+ @bentoml.Runnable.method(batchable=False)
23
+ def transcribe_audio_faster(self, temp_file_path):
24
+ segments, info = self.faster_model.transcribe(temp_file_path)
25
+ transcription = []
26
+ segment_info = []
27
+ for segment in segments:
28
+ print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
29
+ transcription.append(segment.text)
30
+ segment_info.append((segment.start, segment.end))
31
+
32
+ return transcription, info, segment_info
33
+
34
+ @bentoml.Runnable.method(batchable=False)
35
+ def transcribe_audio(self, file):
36
+ with tempfile.NamedTemporaryFile(delete=False) as temp_file:
37
+ temp_file.write(file.read())
38
+ temp_file_path = temp_file.name
39
+
40
+ # Use the temporary file path as input for ffmpeg.input()
41
+
42
+ try:
43
+ # This launches a subprocess to decode audio while down-mixing and resampling as necessary.
44
+ # Requires the ffmpeg CLI and `ffmpeg-python` package to be installed.
45
+ out, _ = (
46
+ ffmpeg.input(temp_file_path, threads=0)
47
+ .output("-", format="s16le", acodec="pcm_s16le", ac=1, ar=self.SAMPLE_RATE)
48
+ .run(cmd="ffmpeg", capture_stdout=True, capture_stderr=True)
49
+ )
50
+ except ffmpeg.Error as e:
51
+ raise RuntimeError(f"Failed to load audio: {e.stderr.decode()}")
52
+
53
+ input_features = np.frombuffer(out, np.int16).flatten().astype(np.float32) / 32768.0
54
+
55
+ print("Input_features", type(input_features))
56
+
57
+ pipe = pipeline("automatic-speech-recognition",
58
+ model=self.model, tokenizer=self.tokenizer, feature_extractor=self.extractor,
59
+ device=self.device)
60
+
61
+ result = self.get_long_transcription_whisper(input_features, pipe)
62
+
63
+ return result
64
+
65
+ @staticmethod
66
+ def get_long_transcription_whisper(input_features, pipe, return_timestamps=True,
67
+ chunk_length_s=10, stride_length_s=2):
68
+ """Get the transcription of a long audio file using the Whisper model
69
+ input_feature: numpy.ndarray
70
+ """
71
+
72
+ return pipe(input_features, return_timestamps=return_timestamps,
73
+ chunk_length_s=chunk_length_s, stride_length_s=stride_length_s)
src/runners/keyword_extractor.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import bentoml
2
+ import yake
3
+
4
+
5
+ class KeywordExtractor(bentoml.Runnable):
6
+ SUPPORTED_RESOURCES = ("cpu",)
7
+ SUPPORTS_CPU_MULTI_THREADING = False
8
+
9
+ def __init__(self, **kwargs):
10
+ self.keyword_extractor = yake.KeywordExtractor(**kwargs)
11
+
12
+ @bentoml.Runnable.method(batchable=False)
13
+ def extract_keywords(self, transcript, lang, key_count):
14
+ self.keyword_extractor.lan = lang
15
+ self.keyword_extractor.top = key_count
16
+ keywords = self.keyword_extractor.extract_keywords(transcript)
17
+ print("keywords successfully extracted")
18
+ return keywords
src/runners/transcription_zipper.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import io
2
+ import os.path
3
+ import zipfile
4
+ import bentoml
5
+ import tempfile
6
+ from pathlib import Path
7
+
8
+ from docx.enum.text import WD_COLOR_INDEX
9
+
10
+
11
+ class TranscriptionZipper(bentoml.Runnable):
12
+ SUPPORTED_RESOURCES = ("nvidia.com/gpu", "cpu")
13
+ SUPPORTS_CPU_MULTI_THREADING = True
14
+
15
+ @bentoml.Runnable.method(batchable=False)
16
+ def zip_transcription(self, transcription_list):
17
+ zip_buffer = io.BytesIO()
18
+
19
+ for t_list in transcription_list:
20
+ orig_filename = t_list[0]
21
+ new_content = create_word_content(t_list[1], t_list[2])
22
+ new_content.save(orig_filename + '.docx')
23
+ # new_content = create_content(t_list[1], t_list[2]) # html string
24
+ if ".mp3" in orig_filename:
25
+ file_name = orig_filename.removesuffix(".mp3")
26
+ else:
27
+ file_name = orig_filename.removesuffix(".wav")
28
+ with zipfile.ZipFile(zip_buffer, "a") as zip_file:
29
+ #zip_file.writestr(file_name + ".html", new_content)
30
+ zip_file.write(orig_filename + '.docx')
31
+
32
+ # Return the zip file as bytes
33
+ return zip_buffer.getvalue()
34
+
35
+ def create_word_content(old_content, new_content):
36
+ from docx import Document
37
+
38
+ document = Document()
39
+
40
+ # document.add_heading('Document Title', 0)
41
+
42
+ p = document.add_paragraph()
43
+
44
+ old_content = old_content.split(" ")
45
+ changed_content = new_content.split(" ")
46
+ both = [word for word in changed_content if word in old_content]
47
+ new_content = ""
48
+ i = 0
49
+
50
+ while i < len(changed_content):
51
+ try:
52
+ if changed_content[i] == old_content[i]:
53
+ p.add_run(" " + changed_content[i])
54
+ #new_content += " " + changed_content[i]
55
+ both.pop(0)
56
+ old_content.pop(0)
57
+ changed_content.pop(0)
58
+ else:
59
+ old_pos = old_content.index(both[0])
60
+ new_pos = changed_content.index(both[0])
61
+ p.add_run(" " + " ".join(old_content[0:old_pos])).font.strike = True
62
+ p.add_run(" " + " ".join(changed_content[0:new_pos])).font.highlight_color = WD_COLOR_INDEX.YELLOW
63
+ # new_content += " <s>" + " ".join(old_content[0:old_pos]) + "</s> "
64
+ # new_content += " ".join(changed_content[0:new_pos])
65
+ del old_content[0:old_pos]
66
+ del changed_content[0:new_pos]
67
+ except:
68
+ p.add_run(" ".join(old_content[i:])).font.strike = True
69
+ p.add_run(" ".join(changed_content[i:])).font.highlight_color = WD_COLOR_INDEX.YELLOW
70
+ # new_content += " <s>" + " ".join(old_content[i:]) + "</s> "
71
+ # new_content += " " + " ".join(changed_content[i:])
72
+ break
73
+
74
+ return document
75
+
76
+
77
+ def create_content(old_content, new_content):
78
+ old_content = old_content.split(" ")
79
+ changed_content = new_content.split(" ")
80
+ both = [word for word in changed_content if word in old_content]
81
+ new_content = ""
82
+ i = 0
83
+
84
+ while i < len(changed_content):
85
+ try:
86
+ if changed_content[i] == old_content[i]:
87
+ new_content += " " + changed_content[i]
88
+ both.pop(0)
89
+ old_content.pop(0)
90
+ changed_content.pop(0)
91
+ else:
92
+ old_pos = old_content.index(both[0])
93
+ new_pos = changed_content.index(both[0])
94
+ new_content += " <s>" + " ".join(old_content[0:old_pos]) + "</s> "
95
+ new_content += " ".join(changed_content[0:new_pos])
96
+ del old_content[0:old_pos]
97
+ del changed_content[0:new_pos]
98
+ except:
99
+ new_content += " <s>" + " ".join(old_content[i:]) + "</s> "
100
+ new_content += " " + " ".join(changed_content[i:])
101
+ break
102
+
103
+ return new_content
src/service.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import tempfile
3
+
4
+ import bentoml
5
+ from bentoml.io import JSON, File
6
+
7
+ from runners.audio_transcriber import AudioTranscriber
8
+ from runners.keyword_extractor import KeywordExtractor
9
+ from runners.transcription_zipper import TranscriptionZipper
10
+ from runners.audio_amplitude import AudioAmplitude
11
+
12
+ runner_audio_transcriber = bentoml.Runner(
13
+ AudioTranscriber,
14
+ name="audio_transcriber",
15
+ )
16
+ runner_audio_amplitude = bentoml.Runner(
17
+ AudioAmplitude,
18
+ name="audio_amplitude",
19
+ )
20
+ runner_keyword_extractor = bentoml.Runner(
21
+ KeywordExtractor,
22
+ name="keyword_extractor",
23
+ )
24
+
25
+ runner_transcription_zipper = bentoml.Runner(
26
+ TranscriptionZipper,
27
+ name="transcription_zipper"
28
+ )
29
+
30
+ svc = bentoml.Service(
31
+ "speech_to_text_pipeline",
32
+ runners=[
33
+ runner_audio_transcriber,
34
+ runner_audio_amplitude,
35
+ runner_keyword_extractor,
36
+ runner_transcription_zipper,
37
+ ],
38
+ )
39
+
40
+
41
+ @svc.api(input=File(), output=JSON())
42
+ async def process_uploaded_file(file):
43
+ with tempfile.NamedTemporaryFile(delete=False) as temp_file:
44
+ temp_file.write(file.read())
45
+ temp_file_path = temp_file.name
46
+ transcript, info, segment_info = await runner_audio_transcriber.transcribe_audio_faster.async_run(temp_file_path)
47
+ # amplitudes = await runner_audio_amplitude.get_audio_amplitude.async_run(temp_file_path)
48
+ output = {"file_name": file.name, "transcript": transcript, "language": info.language,
49
+ "file_size": os.stat(temp_file_path).st_size, "segments": segment_info}
50
+ return output
51
+
52
+
53
+ @svc.api(input=JSON(), output=File())
54
+ async def zip_transcription(transcription):
55
+ zip_file = await runner_transcription_zipper.zip_transcription.async_run(transcription)
56
+ return zip_file