Spaces:
Sleeping
Sleeping
sometimes the client works
Browse filesbut not always. server seems to work well
- infer.py +7 -3
- model.py +7 -0
- poetry.lock +34 -140
- pyproject.toml +17 -3
- streaming_client.py +46 -2
infer.py
CHANGED
@@ -18,13 +18,16 @@ from typing import Optional
|
|
18 |
import sys
|
19 |
import asyncio
|
20 |
|
21 |
-
from model import segment_to_dict
|
22 |
|
23 |
# Configure logging
|
24 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s',
|
25 |
handlers=[logging.StreamHandler(sys.stdout)], force=True)
|
26 |
logger = logging.getLogger(__name__)
|
27 |
#logging.getLogger("asyncio").setLevel(logging.DEBUG)
|
|
|
|
|
|
|
28 |
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
29 |
logging.info(f'Device selected: {device}')
|
30 |
|
@@ -70,7 +73,8 @@ async def websocket_transcribe(websocket: WebSocket):
|
|
70 |
audio_file_path = temp_audio_file.name
|
71 |
|
72 |
# Call the transcribe function
|
73 |
-
segments, info = await asyncio.to_thread(model.transcribe,
|
|
|
74 |
audio_file_path,
|
75 |
language='he',
|
76 |
initial_prompt=input_data.init_prompt,
|
@@ -82,7 +86,7 @@ async def websocket_transcribe(websocket: WebSocket):
|
|
82 |
# Convert segments to list and serialize
|
83 |
segments_list = list(segments)
|
84 |
segments_serializable = [segment_to_dict(s) for s in segments_list]
|
85 |
-
|
86 |
# Send the serialized segments back to the client
|
87 |
await websocket.send_json(segments_serializable)
|
88 |
|
|
|
18 |
import sys
|
19 |
import asyncio
|
20 |
|
21 |
+
from model import segment_to_dict, get_raw_words_from_segments
|
22 |
|
23 |
# Configure logging
|
24 |
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s',
|
25 |
handlers=[logging.StreamHandler(sys.stdout)], force=True)
|
26 |
logger = logging.getLogger(__name__)
|
27 |
#logging.getLogger("asyncio").setLevel(logging.DEBUG)
|
28 |
+
|
29 |
+
logging.info(torch.__version__)
|
30 |
+
logging.info(torch.version.cuda) # Should show the installed CUDA version
|
31 |
device = 'cuda' if torch.cuda.is_available() else 'cpu'
|
32 |
logging.info(f'Device selected: {device}')
|
33 |
|
|
|
73 |
audio_file_path = temp_audio_file.name
|
74 |
|
75 |
# Call the transcribe function
|
76 |
+
# segments, info = await asyncio.to_thread(model.transcribe,
|
77 |
+
segments, info = model.transcribe(
|
78 |
audio_file_path,
|
79 |
language='he',
|
80 |
initial_prompt=input_data.init_prompt,
|
|
|
86 |
# Convert segments to list and serialize
|
87 |
segments_list = list(segments)
|
88 |
segments_serializable = [segment_to_dict(s) for s in segments_list]
|
89 |
+
logger.info(get_raw_words_from_segments(segments_list))
|
90 |
# Send the serialized segments back to the client
|
91 |
await websocket.send_json(segments_serializable)
|
92 |
|
model.py
CHANGED
@@ -51,4 +51,11 @@ def dict_to_segment(data: dict) -> Segment:
|
|
51 |
compression_ratio=data["compression_ratio"],
|
52 |
no_speech_prob=data["no_speech_prob"],
|
53 |
words=[dict_to_word(word) for word in data["words"]] if data["words"] else None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
)
|
|
|
51 |
compression_ratio=data["compression_ratio"],
|
52 |
no_speech_prob=data["no_speech_prob"],
|
53 |
words=[dict_to_word(word) for word in data["words"]] if data["words"] else None
|
54 |
+
)
|
55 |
+
|
56 |
+
def get_raw_words_from_segments(segments: list[Segment]) -> str:
|
57 |
+
return " ".join(
|
58 |
+
word.word
|
59 |
+
for segment in segments if segment.words
|
60 |
+
for word in segment.words
|
61 |
)
|
poetry.lock
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
# This file is automatically @generated by Poetry 1.
|
2 |
|
3 |
[[package]]
|
4 |
name = "annotated-types"
|
@@ -2028,31 +2028,21 @@ testing = ["black (==22.3)", "datasets", "numpy", "pytest", "requests", "ruff"]
|
|
2028 |
|
2029 |
[[package]]
|
2030 |
name = "torch"
|
2031 |
-
version = "2.4.1"
|
2032 |
description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration"
|
2033 |
optional = false
|
2034 |
python-versions = ">=3.8.0"
|
2035 |
files = [
|
2036 |
-
{file = "torch-2.4.1-cp310-cp310-
|
2037 |
-
{file = "torch-2.4.1-cp310-cp310-
|
2038 |
-
{file = "torch-2.4.1-
|
2039 |
-
{file = "torch-2.4.1-
|
2040 |
-
{file = "torch-2.4.1-
|
2041 |
-
{file = "torch-2.4.1-
|
2042 |
-
{file = "torch-2.4.1-
|
2043 |
-
{file = "torch-2.4.1-
|
2044 |
-
{file = "torch-2.4.1-
|
2045 |
-
{file = "torch-2.4.1-
|
2046 |
-
{file = "torch-2.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:ebea70ff30544fc021d441ce6b219a88b67524f01170b1c538d7d3ebb5e7f56c"},
|
2047 |
-
{file = "torch-2.4.1-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:72b484d5b6cec1a735bf3fa5a1c4883d01748698c5e9cfdbeb4ffab7c7987e0d"},
|
2048 |
-
{file = "torch-2.4.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:c99e1db4bf0c5347107845d715b4aa1097e601bdc36343d758963055e9599d93"},
|
2049 |
-
{file = "torch-2.4.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:b57f07e92858db78c5b72857b4f0b33a65b00dc5d68e7948a8494b0314efb880"},
|
2050 |
-
{file = "torch-2.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:f18197f3f7c15cde2115892b64f17c80dbf01ed72b008020e7da339902742cf6"},
|
2051 |
-
{file = "torch-2.4.1-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:5fc1d4d7ed265ef853579caf272686d1ed87cebdcd04f2a498f800ffc53dab71"},
|
2052 |
-
{file = "torch-2.4.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:40f6d3fe3bae74efcf08cb7f8295eaddd8a838ce89e9d26929d4edd6d5e4329d"},
|
2053 |
-
{file = "torch-2.4.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:c9299c16c9743001ecef515536ac45900247f4338ecdf70746f2461f9e4831db"},
|
2054 |
-
{file = "torch-2.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:6bce130f2cd2d52ba4e2c6ada461808de7e5eccbac692525337cfb4c19421846"},
|
2055 |
-
{file = "torch-2.4.1-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:a38de2803ee6050309aac032676536c3d3b6a9804248537e38e098d0e14817ec"},
|
2056 |
]
|
2057 |
|
2058 |
[package.dependencies]
|
@@ -2071,7 +2061,6 @@ nvidia-cusolver-cu12 = {version = "11.4.5.107", markers = "platform_system == \"
|
|
2071 |
nvidia-cusparse-cu12 = {version = "12.1.0.106", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
|
2072 |
nvidia-nccl-cu12 = {version = "2.20.5", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
|
2073 |
nvidia-nvtx-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
|
2074 |
-
setuptools = "*"
|
2075 |
sympy = "*"
|
2076 |
triton = {version = "3.0.0", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version < \"3.13\""}
|
2077 |
typing-extensions = ">=4.8.0"
|
@@ -2080,38 +2069,38 @@ typing-extensions = ">=4.8.0"
|
|
2080 |
opt-einsum = ["opt-einsum (>=3.3)"]
|
2081 |
optree = ["optree (>=0.11.0)"]
|
2082 |
|
|
|
|
|
|
|
|
|
|
|
2083 |
[[package]]
|
2084 |
name = "torchaudio"
|
2085 |
-
version = "2.4.1"
|
2086 |
description = "An audio package for PyTorch"
|
2087 |
optional = false
|
2088 |
python-versions = "*"
|
2089 |
files = [
|
2090 |
-
{file = "torchaudio-2.4.1-cp310-cp310-
|
2091 |
-
{file = "torchaudio-2.4.1-cp310-cp310-
|
2092 |
-
{file = "torchaudio-2.4.1-
|
2093 |
-
{file = "torchaudio-2.4.1-
|
2094 |
-
{file = "torchaudio-2.4.1-
|
2095 |
-
{file = "torchaudio-2.4.1-
|
2096 |
-
{file = "torchaudio-2.4.1-
|
2097 |
-
{file = "torchaudio-2.4.1-
|
2098 |
-
{file = "torchaudio-2.4.1-
|
2099 |
-
{file = "torchaudio-2.4.1-
|
2100 |
-
{file = "torchaudio-2.4.1-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:5b62fc7b16ed708b0c07d4393137797e92f63fc3bd5705607d97ba6a9a7cf3f0"},
|
2101 |
-
{file = "torchaudio-2.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:d721b186aae7bd8752c9ad95213f5d650926597bb9060728dfe476986a1ff570"},
|
2102 |
-
{file = "torchaudio-2.4.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4ea0fd00142fe795c75bcc20a303981b56f2327c7f7d321b42a8fef1d78aafa9"},
|
2103 |
-
{file = "torchaudio-2.4.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:375d8740c8035a50faca7a5afe2fbdb712aa8733715b971b2af61b4003fa1c41"},
|
2104 |
-
{file = "torchaudio-2.4.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:74d19cf9ca3dad394afcabb7e6f7ed9ab9f59f2540d502826c7ec3e33985251d"},
|
2105 |
-
{file = "torchaudio-2.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:40e9fa8fdc8d328ea4aa90be65fd34c5ef975610dbd707545e3664393a8a2497"},
|
2106 |
-
{file = "torchaudio-2.4.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3adce550850902b9aa6cd2378ccd720ac9ec8cf31e2eba9743ccc84ffcbe76d6"},
|
2107 |
-
{file = "torchaudio-2.4.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:98d8e03703f96b13a8d172d1ccdc7badb338227fd762985fdcea6b30f6697bdb"},
|
2108 |
-
{file = "torchaudio-2.4.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:36c7e7bc6b358cbf42b769c80206780fa1497d141a985c6b3e7768de44524e9a"},
|
2109 |
-
{file = "torchaudio-2.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:f46e34ab3866ad8d8ace0673cd11e697c5cde6a3b7a4d8d789207d4d8badbb6e"},
|
2110 |
]
|
2111 |
|
2112 |
[package.dependencies]
|
2113 |
torch = "2.4.1"
|
2114 |
|
|
|
|
|
|
|
|
|
|
|
2115 |
[[package]]
|
2116 |
name = "tqdm"
|
2117 |
version = "4.66.5"
|
@@ -2284,101 +2273,6 @@ docs = ["Sphinx (>=6.0)", "myst-parser (>=2.0.0)", "sphinx-rtd-theme (>=1.1.0)"]
|
|
2284 |
optional = ["python-socks", "wsaccel"]
|
2285 |
test = ["websockets"]
|
2286 |
|
2287 |
-
[[package]]
|
2288 |
-
name = "websockets"
|
2289 |
-
version = "13.0.1"
|
2290 |
-
description = "An implementation of the WebSocket Protocol (RFC 6455 & 7692)"
|
2291 |
-
optional = false
|
2292 |
-
python-versions = ">=3.8"
|
2293 |
-
files = [
|
2294 |
-
{file = "websockets-13.0.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:1841c9082a3ba4a05ea824cf6d99570a6a2d8849ef0db16e9c826acb28089e8f"},
|
2295 |
-
{file = "websockets-13.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c5870b4a11b77e4caa3937142b650fbbc0914a3e07a0cf3131f35c0587489c1c"},
|
2296 |
-
{file = "websockets-13.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f1d3d1f2eb79fe7b0fb02e599b2bf76a7619c79300fc55f0b5e2d382881d4f7f"},
|
2297 |
-
{file = "websockets-13.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:15c7d62ee071fa94a2fc52c2b472fed4af258d43f9030479d9c4a2de885fd543"},
|
2298 |
-
{file = "websockets-13.0.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6724b554b70d6195ba19650fef5759ef11346f946c07dbbe390e039bcaa7cc3d"},
|
2299 |
-
{file = "websockets-13.0.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:56a952fa2ae57a42ba7951e6b2605e08a24801a4931b5644dfc68939e041bc7f"},
|
2300 |
-
{file = "websockets-13.0.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:17118647c0ea14796364299e942c330d72acc4b248e07e639d34b75067b3cdd8"},
|
2301 |
-
{file = "websockets-13.0.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:64a11aae1de4c178fa653b07d90f2fb1a2ed31919a5ea2361a38760192e1858b"},
|
2302 |
-
{file = "websockets-13.0.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:0617fd0b1d14309c7eab6ba5deae8a7179959861846cbc5cb528a7531c249448"},
|
2303 |
-
{file = "websockets-13.0.1-cp310-cp310-win32.whl", hash = "sha256:11f9976ecbc530248cf162e359a92f37b7b282de88d1d194f2167b5e7ad80ce3"},
|
2304 |
-
{file = "websockets-13.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:c3c493d0e5141ec055a7d6809a28ac2b88d5b878bb22df8c621ebe79a61123d0"},
|
2305 |
-
{file = "websockets-13.0.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:699ba9dd6a926f82a277063603fc8d586b89f4cb128efc353b749b641fcddda7"},
|
2306 |
-
{file = "websockets-13.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cf2fae6d85e5dc384bf846f8243ddaa9197f3a1a70044f59399af001fd1f51d4"},
|
2307 |
-
{file = "websockets-13.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:52aed6ef21a0f1a2a5e310fb5c42d7555e9c5855476bbd7173c3aa3d8a0302f2"},
|
2308 |
-
{file = "websockets-13.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8eb2b9a318542153674c6e377eb8cb9ca0fc011c04475110d3477862f15d29f0"},
|
2309 |
-
{file = "websockets-13.0.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5df891c86fe68b2c38da55b7aea7095beca105933c697d719f3f45f4220a5e0e"},
|
2310 |
-
{file = "websockets-13.0.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fac2d146ff30d9dd2fcf917e5d147db037a5c573f0446c564f16f1f94cf87462"},
|
2311 |
-
{file = "websockets-13.0.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:b8ac5b46fd798bbbf2ac6620e0437c36a202b08e1f827832c4bf050da081b501"},
|
2312 |
-
{file = "websockets-13.0.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:46af561eba6f9b0848b2c9d2427086cabadf14e0abdd9fde9d72d447df268418"},
|
2313 |
-
{file = "websockets-13.0.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b5a06d7f60bc2fc378a333978470dfc4e1415ee52f5f0fce4f7853eb10c1e9df"},
|
2314 |
-
{file = "websockets-13.0.1-cp311-cp311-win32.whl", hash = "sha256:556e70e4f69be1082e6ef26dcb70efcd08d1850f5d6c5f4f2bcb4e397e68f01f"},
|
2315 |
-
{file = "websockets-13.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:67494e95d6565bf395476e9d040037ff69c8b3fa356a886b21d8422ad86ae075"},
|
2316 |
-
{file = "websockets-13.0.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f9c9e258e3d5efe199ec23903f5da0eeaad58cf6fccb3547b74fd4750e5ac47a"},
|
2317 |
-
{file = "websockets-13.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:6b41a1b3b561f1cba8321fb32987552a024a8f67f0d05f06fcf29f0090a1b956"},
|
2318 |
-
{file = "websockets-13.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f73e676a46b0fe9426612ce8caeca54c9073191a77c3e9d5c94697aef99296af"},
|
2319 |
-
{file = "websockets-13.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f613289f4a94142f914aafad6c6c87903de78eae1e140fa769a7385fb232fdf"},
|
2320 |
-
{file = "websockets-13.0.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0f52504023b1480d458adf496dc1c9e9811df4ba4752f0bc1f89ae92f4f07d0c"},
|
2321 |
-
{file = "websockets-13.0.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:139add0f98206cb74109faf3611b7783ceafc928529c62b389917a037d4cfdf4"},
|
2322 |
-
{file = "websockets-13.0.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:47236c13be337ef36546004ce8c5580f4b1150d9538b27bf8a5ad8edf23ccfab"},
|
2323 |
-
{file = "websockets-13.0.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:c44ca9ade59b2e376612df34e837013e2b273e6c92d7ed6636d0556b6f4db93d"},
|
2324 |
-
{file = "websockets-13.0.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:9bbc525f4be3e51b89b2a700f5746c2a6907d2e2ef4513a8daafc98198b92237"},
|
2325 |
-
{file = "websockets-13.0.1-cp312-cp312-win32.whl", hash = "sha256:3624fd8664f2577cf8de996db3250662e259bfbc870dd8ebdcf5d7c6ac0b5185"},
|
2326 |
-
{file = "websockets-13.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0513c727fb8adffa6d9bf4a4463b2bade0186cbd8c3604ae5540fae18a90cb99"},
|
2327 |
-
{file = "websockets-13.0.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:1ee4cc030a4bdab482a37462dbf3ffb7e09334d01dd37d1063be1136a0d825fa"},
|
2328 |
-
{file = "websockets-13.0.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:dbb0b697cc0655719522406c059eae233abaa3243821cfdfab1215d02ac10231"},
|
2329 |
-
{file = "websockets-13.0.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:acbebec8cb3d4df6e2488fbf34702cbc37fc39ac7abf9449392cefb3305562e9"},
|
2330 |
-
{file = "websockets-13.0.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:63848cdb6fcc0bf09d4a155464c46c64ffdb5807ede4fb251da2c2692559ce75"},
|
2331 |
-
{file = "websockets-13.0.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:872afa52a9f4c414d6955c365b6588bc4401272c629ff8321a55f44e3f62b553"},
|
2332 |
-
{file = "websockets-13.0.1-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:05e70fec7c54aad4d71eae8e8cab50525e899791fc389ec6f77b95312e4e9920"},
|
2333 |
-
{file = "websockets-13.0.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e82db3756ccb66266504f5a3de05ac6b32f287faacff72462612120074103329"},
|
2334 |
-
{file = "websockets-13.0.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:4e85f46ce287f5c52438bb3703d86162263afccf034a5ef13dbe4318e98d86e7"},
|
2335 |
-
{file = "websockets-13.0.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f3fea72e4e6edb983908f0db373ae0732b275628901d909c382aae3b592589f2"},
|
2336 |
-
{file = "websockets-13.0.1-cp313-cp313-win32.whl", hash = "sha256:254ecf35572fca01a9f789a1d0f543898e222f7b69ecd7d5381d8d8047627bdb"},
|
2337 |
-
{file = "websockets-13.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:ca48914cdd9f2ccd94deab5bcb5ac98025a5ddce98881e5cce762854a5de330b"},
|
2338 |
-
{file = "websockets-13.0.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:b74593e9acf18ea5469c3edaa6b27fa7ecf97b30e9dabd5a94c4c940637ab96e"},
|
2339 |
-
{file = "websockets-13.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:132511bfd42e77d152c919147078460c88a795af16b50e42a0bd14f0ad71ddd2"},
|
2340 |
-
{file = "websockets-13.0.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:165bedf13556f985a2aa064309baa01462aa79bf6112fbd068ae38993a0e1f1b"},
|
2341 |
-
{file = "websockets-13.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e801ca2f448850685417d723ec70298feff3ce4ff687c6f20922c7474b4746ae"},
|
2342 |
-
{file = "websockets-13.0.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:30d3a1f041360f029765d8704eae606781e673e8918e6b2c792e0775de51352f"},
|
2343 |
-
{file = "websockets-13.0.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:67648f5e50231b5a7f6d83b32f9c525e319f0ddc841be0de64f24928cd75a603"},
|
2344 |
-
{file = "websockets-13.0.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:4f0426d51c8f0926a4879390f53c7f5a855e42d68df95fff6032c82c888b5f36"},
|
2345 |
-
{file = "websockets-13.0.1-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:ef48e4137e8799998a343706531e656fdec6797b80efd029117edacb74b0a10a"},
|
2346 |
-
{file = "websockets-13.0.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:249aab278810bee585cd0d4de2f08cfd67eed4fc75bde623be163798ed4db2eb"},
|
2347 |
-
{file = "websockets-13.0.1-cp38-cp38-win32.whl", hash = "sha256:06c0a667e466fcb56a0886d924b5f29a7f0886199102f0a0e1c60a02a3751cb4"},
|
2348 |
-
{file = "websockets-13.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1f3cf6d6ec1142412d4535adabc6bd72a63f5f148c43fe559f06298bc21953c9"},
|
2349 |
-
{file = "websockets-13.0.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:1fa082ea38d5de51dd409434edc27c0dcbd5fed2b09b9be982deb6f0508d25bc"},
|
2350 |
-
{file = "websockets-13.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4a365bcb7be554e6e1f9f3ed64016e67e2fa03d7b027a33e436aecf194febb63"},
|
2351 |
-
{file = "websockets-13.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:10a0dc7242215d794fb1918f69c6bb235f1f627aaf19e77f05336d147fce7c37"},
|
2352 |
-
{file = "websockets-13.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:59197afd478545b1f73367620407b0083303569c5f2d043afe5363676f2697c9"},
|
2353 |
-
{file = "websockets-13.0.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7d20516990d8ad557b5abeb48127b8b779b0b7e6771a265fa3e91767596d7d97"},
|
2354 |
-
{file = "websockets-13.0.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a1a2e272d067030048e1fe41aa1ec8cfbbaabce733b3d634304fa2b19e5c897f"},
|
2355 |
-
{file = "websockets-13.0.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:ad327ac80ba7ee61da85383ca8822ff808ab5ada0e4a030d66703cc025b021c4"},
|
2356 |
-
{file = "websockets-13.0.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:518f90e6dd089d34eaade01101fd8a990921c3ba18ebbe9b0165b46ebff947f0"},
|
2357 |
-
{file = "websockets-13.0.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:68264802399aed6fe9652e89761031acc734fc4c653137a5911c2bfa995d6d6d"},
|
2358 |
-
{file = "websockets-13.0.1-cp39-cp39-win32.whl", hash = "sha256:a5dc0c42ded1557cc7c3f0240b24129aefbad88af4f09346164349391dea8e58"},
|
2359 |
-
{file = "websockets-13.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:b448a0690ef43db5ef31b3a0d9aea79043882b4632cfc3eaab20105edecf6097"},
|
2360 |
-
{file = "websockets-13.0.1-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:faef9ec6354fe4f9a2c0bbb52fb1ff852effc897e2a4501e25eb3a47cb0a4f89"},
|
2361 |
-
{file = "websockets-13.0.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:03d3f9ba172e0a53e37fa4e636b86cc60c3ab2cfee4935e66ed1d7acaa4625ad"},
|
2362 |
-
{file = "websockets-13.0.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d450f5a7a35662a9b91a64aefa852f0c0308ee256122f5218a42f1d13577d71e"},
|
2363 |
-
{file = "websockets-13.0.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3f55b36d17ac50aa8a171b771e15fbe1561217510c8768af3d546f56c7576cdc"},
|
2364 |
-
{file = "websockets-13.0.1-pp310-pypy310_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:14b9c006cac63772b31abbcd3e3abb6228233eec966bf062e89e7fa7ae0b7333"},
|
2365 |
-
{file = "websockets-13.0.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:b79915a1179a91f6c5f04ece1e592e2e8a6bd245a0e45d12fd56b2b59e559a32"},
|
2366 |
-
{file = "websockets-13.0.1-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:f40de079779acbcdbb6ed4c65af9f018f8b77c5ec4e17a4b737c05c2db554491"},
|
2367 |
-
{file = "websockets-13.0.1-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:80e4ba642fc87fa532bac07e5ed7e19d56940b6af6a8c61d4429be48718a380f"},
|
2368 |
-
{file = "websockets-13.0.1-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2a02b0161c43cc9e0232711eff846569fad6ec836a7acab16b3cf97b2344c060"},
|
2369 |
-
{file = "websockets-13.0.1-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6aa74a45d4cdc028561a7d6ab3272c8b3018e23723100b12e58be9dfa5a24491"},
|
2370 |
-
{file = "websockets-13.0.1-pp38-pypy38_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:00fd961943b6c10ee6f0b1130753e50ac5dcd906130dcd77b0003c3ab797d026"},
|
2371 |
-
{file = "websockets-13.0.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:d93572720d781331fb10d3da9ca1067817d84ad1e7c31466e9f5e59965618096"},
|
2372 |
-
{file = "websockets-13.0.1-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:71e6e5a3a3728886caee9ab8752e8113670936a193284be9d6ad2176a137f376"},
|
2373 |
-
{file = "websockets-13.0.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:c4a6343e3b0714e80da0b0893543bf9a5b5fa71b846ae640e56e9abc6fbc4c83"},
|
2374 |
-
{file = "websockets-13.0.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a678532018e435396e37422a95e3ab87f75028ac79570ad11f5bf23cd2a7d8c"},
|
2375 |
-
{file = "websockets-13.0.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d6716c087e4aa0b9260c4e579bb82e068f84faddb9bfba9906cb87726fa2e870"},
|
2376 |
-
{file = "websockets-13.0.1-pp39-pypy39_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e33505534f3f673270dd67f81e73550b11de5b538c56fe04435d63c02c3f26b5"},
|
2377 |
-
{file = "websockets-13.0.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:acab3539a027a85d568c2573291e864333ec9d912675107d6efceb7e2be5d980"},
|
2378 |
-
{file = "websockets-13.0.1-py3-none-any.whl", hash = "sha256:b80f0c51681c517604152eb6a572f5a9378f877763231fddb883ba2f968e8817"},
|
2379 |
-
{file = "websockets-13.0.1.tar.gz", hash = "sha256:4d6ece65099411cfd9a48d13701d7438d9c34f479046b34c50ff60bb8834e43e"},
|
2380 |
-
]
|
2381 |
-
|
2382 |
[[package]]
|
2383 |
name = "whisper"
|
2384 |
version = "1.1.10"
|
@@ -2395,4 +2289,4 @@ six = "*"
|
|
2395 |
[metadata]
|
2396 |
lock-version = "2.0"
|
2397 |
python-versions = "3.11.7"
|
2398 |
-
content-hash = "
|
|
|
1 |
+
# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
|
2 |
|
3 |
[[package]]
|
4 |
name = "annotated-types"
|
|
|
2028 |
|
2029 |
[[package]]
|
2030 |
name = "torch"
|
2031 |
+
version = "2.4.1+cu121"
|
2032 |
description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration"
|
2033 |
optional = false
|
2034 |
python-versions = ">=3.8.0"
|
2035 |
files = [
|
2036 |
+
{file = "torch-2.4.1+cu121-cp310-cp310-linux_x86_64.whl", hash = "sha256:9a5f0b103cfe840b3568416aa5067f6e7b9fec67d9c5659fd43b1207450fe975"},
|
2037 |
+
{file = "torch-2.4.1+cu121-cp310-cp310-win_amd64.whl", hash = "sha256:fe3bf682e86c08d6a8ec0ee30811732487fa688fc556d6e8f92d853d85507c0d"},
|
2038 |
+
{file = "torch-2.4.1+cu121-cp311-cp311-linux_x86_64.whl", hash = "sha256:914d128e5abcbbe79ca1b9eb5311b185444f1b2d7117df555fe418487ecfb894"},
|
2039 |
+
{file = "torch-2.4.1+cu121-cp311-cp311-win_amd64.whl", hash = "sha256:bc1e21d7412a2f06f552a9afb92c56c8b23d174884e9383259c3cf5db4687c98"},
|
2040 |
+
{file = "torch-2.4.1+cu121-cp312-cp312-linux_x86_64.whl", hash = "sha256:ab491610b15551e08da74bab29d0933e6bf10bab44fb7d4b1328f1e845c05a53"},
|
2041 |
+
{file = "torch-2.4.1+cu121-cp312-cp312-win_amd64.whl", hash = "sha256:b30faf3224697eaed131939690e8877b05b4d4cb6da5b12cfdcba3d742e9afd0"},
|
2042 |
+
{file = "torch-2.4.1+cu121-cp38-cp38-linux_x86_64.whl", hash = "sha256:cb4f502f910b47e1e366ccf7b231dac2967d2efb47d4b8cb33fc63b4bc5eeed8"},
|
2043 |
+
{file = "torch-2.4.1+cu121-cp38-cp38-win_amd64.whl", hash = "sha256:a48b991cd861266523cbed4705f89bef09669d5d2bbfa2524486156f74a222a8"},
|
2044 |
+
{file = "torch-2.4.1+cu121-cp39-cp39-linux_x86_64.whl", hash = "sha256:9986ad3555ddfff55e925d8298f8b2b49106a7dc60f811a2076a445fe4458e2b"},
|
2045 |
+
{file = "torch-2.4.1+cu121-cp39-cp39-win_amd64.whl", hash = "sha256:2ca012a78d7a2777c290a4b79cb2130bf65fdda89f533a8172674034c2a1519c"},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2046 |
]
|
2047 |
|
2048 |
[package.dependencies]
|
|
|
2061 |
nvidia-cusparse-cu12 = {version = "12.1.0.106", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
|
2062 |
nvidia-nccl-cu12 = {version = "2.20.5", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
|
2063 |
nvidia-nvtx-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
|
|
|
2064 |
sympy = "*"
|
2065 |
triton = {version = "3.0.0", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version < \"3.13\""}
|
2066 |
typing-extensions = ">=4.8.0"
|
|
|
2069 |
opt-einsum = ["opt-einsum (>=3.3)"]
|
2070 |
optree = ["optree (>=0.11.0)"]
|
2071 |
|
2072 |
+
[package.source]
|
2073 |
+
type = "legacy"
|
2074 |
+
url = "https://download.pytorch.org/whl/cu121"
|
2075 |
+
reference = "pytorch"
|
2076 |
+
|
2077 |
[[package]]
|
2078 |
name = "torchaudio"
|
2079 |
+
version = "2.4.1+cu121"
|
2080 |
description = "An audio package for PyTorch"
|
2081 |
optional = false
|
2082 |
python-versions = "*"
|
2083 |
files = [
|
2084 |
+
{file = "torchaudio-2.4.1+cu121-cp310-cp310-linux_x86_64.whl", hash = "sha256:da8c87c80a1c1376a48dc33eef30b03bbdf1df25a05bd2b1c620b8811c7b19be"},
|
2085 |
+
{file = "torchaudio-2.4.1+cu121-cp310-cp310-win_amd64.whl", hash = "sha256:317f0827ff010bf71ce9b52ce240856742272440fc9542d8f8ea98212f39d654"},
|
2086 |
+
{file = "torchaudio-2.4.1+cu121-cp311-cp311-linux_x86_64.whl", hash = "sha256:01b04edb9135a7d60fa0100fc01ffb40a0858010f599ae6418f40e09e88e681b"},
|
2087 |
+
{file = "torchaudio-2.4.1+cu121-cp311-cp311-win_amd64.whl", hash = "sha256:06095d5941a2843663a230bef739680541e0a491caf41df8fb1552eeefd212b0"},
|
2088 |
+
{file = "torchaudio-2.4.1+cu121-cp312-cp312-linux_x86_64.whl", hash = "sha256:6b74d706aba81db5f838ca414f03d3f6598ea880b72106065dbc9c5d3c063fe1"},
|
2089 |
+
{file = "torchaudio-2.4.1+cu121-cp312-cp312-win_amd64.whl", hash = "sha256:1dbcafb1bd954fc33bf34e6062be7b2005cfe3bde82bc938d08e650aa9c9c08c"},
|
2090 |
+
{file = "torchaudio-2.4.1+cu121-cp38-cp38-linux_x86_64.whl", hash = "sha256:fcfbf11699295f65b04581cd04372fe02a239806eb7d25da2746f35de0f5d2d9"},
|
2091 |
+
{file = "torchaudio-2.4.1+cu121-cp38-cp38-win_amd64.whl", hash = "sha256:bafc2e099349c487b8311e2417431eea24b0f0844baafd452403e4e24200ae58"},
|
2092 |
+
{file = "torchaudio-2.4.1+cu121-cp39-cp39-linux_x86_64.whl", hash = "sha256:71bbb06c1018799db3a0bcc094dd08b80bc28bb7b5f27ab8b0e2f38b014b11c6"},
|
2093 |
+
{file = "torchaudio-2.4.1+cu121-cp39-cp39-win_amd64.whl", hash = "sha256:e316c5aa0cf5b844589c5b5803d75846300e6b62edf5921833a13f3022a11b9d"},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2094 |
]
|
2095 |
|
2096 |
[package.dependencies]
|
2097 |
torch = "2.4.1"
|
2098 |
|
2099 |
+
[package.source]
|
2100 |
+
type = "legacy"
|
2101 |
+
url = "https://download.pytorch.org/whl/cu121"
|
2102 |
+
reference = "pytorch"
|
2103 |
+
|
2104 |
[[package]]
|
2105 |
name = "tqdm"
|
2106 |
version = "4.66.5"
|
|
|
2273 |
optional = ["python-socks", "wsaccel"]
|
2274 |
test = ["websockets"]
|
2275 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2276 |
[[package]]
|
2277 |
name = "whisper"
|
2278 |
version = "1.1.10"
|
|
|
2289 |
[metadata]
|
2290 |
lock-version = "2.0"
|
2291 |
python-versions = "3.11.7"
|
2292 |
+
content-hash = "c1e0fffbd7c3ee70d76ab8e44ccdf25c3008b6da1c494d8eafc7c9e3de5fdc65"
|
pyproject.toml
CHANGED
@@ -24,7 +24,8 @@ python = "3.11.7"
|
|
24 |
#numpy = "^1.22.0"
|
25 |
#torch = "2.1.0"
|
26 |
#sounddevice = "^0.5.0"
|
27 |
-
torch = "^2.4.1"
|
|
|
28 |
whisper = "^1.1.10"
|
29 |
requests = "^2.32.3"
|
30 |
transformers = "^4.44.2"
|
@@ -32,10 +33,13 @@ soundfile = "^0.12.1"
|
|
32 |
faster-whisper = "^1.0.3"
|
33 |
fastapi = "^0.114.2"
|
34 |
websockets = "^13.0.1"
|
35 |
-
websocket-client = "^1.8.0"
|
36 |
librosa = "^0.10.2.post1"
|
37 |
uvicorn = "^0.30.6"
|
38 |
-
torchaudio = "^2.4.1"
|
|
|
|
|
|
|
39 |
silero-vad = "^5.1"
|
40 |
#openai = "^1.42.0"
|
41 |
#numpy = "^1.22.0"
|
@@ -45,6 +49,16 @@ silero-vad = "^5.1"
|
|
45 |
#ffmpeg = "^1.4"
|
46 |
|
47 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
|
49 |
|
50 |
|
|
|
24 |
#numpy = "^1.22.0"
|
25 |
#torch = "2.1.0"
|
26 |
#sounddevice = "^0.5.0"
|
27 |
+
# torch = "^2.4.1"
|
28 |
+
torch = { version = "^2.0", source = "pytorch" }
|
29 |
whisper = "^1.1.10"
|
30 |
requests = "^2.32.3"
|
31 |
transformers = "^4.44.2"
|
|
|
33 |
faster-whisper = "^1.0.3"
|
34 |
fastapi = "^0.114.2"
|
35 |
websockets = "^13.0.1"
|
36 |
+
#websocket-client = "^1.8.0"
|
37 |
librosa = "^0.10.2.post1"
|
38 |
uvicorn = "^0.30.6"
|
39 |
+
# torchaudio = "^2.4.1"
|
40 |
+
torchaudio = { version = "^2.0", source = "pytorch" }
|
41 |
+
|
42 |
+
|
43 |
silero-vad = "^5.1"
|
44 |
#openai = "^1.42.0"
|
45 |
#numpy = "^1.22.0"
|
|
|
49 |
#ffmpeg = "^1.4"
|
50 |
|
51 |
|
52 |
+
[[tool.poetry.source]]
|
53 |
+
name = "pytorch"
|
54 |
+
url = "https://download.pytorch.org/whl/cu121"
|
55 |
+
priority = "explicit"
|
56 |
+
|
57 |
+
|
58 |
+
[[tool.poetry.source]]
|
59 |
+
name = "PyPI"
|
60 |
+
priority = "primary"
|
61 |
+
|
62 |
|
63 |
|
64 |
|
streaming_client.py
CHANGED
@@ -4,6 +4,8 @@ import sys
|
|
4 |
import time
|
5 |
import logging
|
6 |
import os
|
|
|
|
|
7 |
import requests
|
8 |
|
9 |
import json
|
@@ -12,6 +14,8 @@ import numpy as np
|
|
12 |
import soundfile as sf
|
13 |
import io
|
14 |
|
|
|
|
|
15 |
# Import the necessary components from whisper_online.py
|
16 |
from libs.whisper_streaming.whisper_online import (
|
17 |
ASRBase,
|
@@ -24,10 +28,44 @@ from libs.whisper_streaming.whisper_online import (
|
|
24 |
load_audio,
|
25 |
load_audio_chunk, OpenaiApiASR,
|
26 |
)
|
27 |
-
from model import dict_to_segment
|
28 |
|
|
|
|
|
29 |
logger = logging.getLogger(__name__)
|
30 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
31 |
# Define the RemoteFasterWhisperASR class
|
32 |
class RemoteFasterWhisperASR(ASRBase):
|
33 |
"""Uses a remote FasterWhisper model via WebSocket."""
|
@@ -65,6 +103,7 @@ class RemoteFasterWhisperASR(ASRBase):
|
|
65 |
response = self.ws.recv()
|
66 |
segments = json.loads(response)
|
67 |
segments = [dict_to_segment(s) for s in segments]
|
|
|
68 |
return segments
|
69 |
|
70 |
def ts_words(self, segments):
|
@@ -154,19 +193,24 @@ def main():
|
|
154 |
import numpy as np
|
155 |
import io
|
156 |
import soundfile as sf
|
|
|
157 |
|
158 |
# Download the audio file if not already present
|
159 |
AUDIO_FILE_URL = "https://raw.githubusercontent.com/AshDavid12/runpod-serverless-forked/main/test_hebrew.wav"
|
160 |
audio_file_path = "test_hebrew.wav"
|
|
|
161 |
if not os.path.exists(audio_file_path):
|
162 |
response = requests.get(AUDIO_FILE_URL)
|
163 |
with open(audio_file_path, 'wb') as f:
|
164 |
f.write(response.content)
|
165 |
|
|
|
|
|
|
|
166 |
# Set up arguments
|
167 |
class Args:
|
168 |
def __init__(self):
|
169 |
-
self.audio_path =
|
170 |
self.lan = 'he'
|
171 |
self.model = None # Not used in RemoteFasterWhisperASR
|
172 |
self.model_cache_dir = None
|
|
|
4 |
import time
|
5 |
import logging
|
6 |
import os
|
7 |
+
from wave import Wave_read
|
8 |
+
|
9 |
import requests
|
10 |
|
11 |
import json
|
|
|
14 |
import soundfile as sf
|
15 |
import io
|
16 |
|
17 |
+
import librosa
|
18 |
+
|
19 |
# Import the necessary components from whisper_online.py
|
20 |
from libs.whisper_streaming.whisper_online import (
|
21 |
ASRBase,
|
|
|
28 |
load_audio,
|
29 |
load_audio_chunk, OpenaiApiASR,
|
30 |
)
|
31 |
+
from model import dict_to_segment, get_raw_words_from_segments
|
32 |
|
33 |
+
logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s',
|
34 |
+
handlers=[logging.StreamHandler(sys.stdout)], force=True)
|
35 |
logger = logging.getLogger(__name__)
|
36 |
|
37 |
+
|
38 |
+
|
39 |
+
def convert_to_mono_16k(input_wav: str, output_wav: str) -> None:
|
40 |
+
"""
|
41 |
+
Converts any .wav file to mono 16 kHz.
|
42 |
+
|
43 |
+
Args:
|
44 |
+
input_wav (str): Path to the input .wav file.
|
45 |
+
output_wav (str): Path to save the output .wav file with mono 16 kHz.
|
46 |
+
"""
|
47 |
+
# Step 1: Load the audio file with librosa
|
48 |
+
audio_data, original_sr = librosa.load(input_wav, sr=None, mono=False) # Load at original sampling rate
|
49 |
+
logger.info("Loaded audio with shape: %s, original sampling rate: %d" % (audio_data.shape, original_sr))
|
50 |
+
|
51 |
+
# Step 2: If the audio has multiple channels, average them to make it mono
|
52 |
+
if audio_data.ndim > 1:
|
53 |
+
audio_data = librosa.to_mono(audio_data)
|
54 |
+
|
55 |
+
# Step 3: Resample the audio to 16 kHz
|
56 |
+
target_sr = 16000
|
57 |
+
resampled_audio = librosa.resample(audio_data, orig_sr=original_sr, target_sr=target_sr)
|
58 |
+
|
59 |
+
# Step 4: Save the resampled audio as a .wav file in mono at 16 kHz
|
60 |
+
sf.write(output_wav, resampled_audio, target_sr)
|
61 |
+
|
62 |
+
logger.info(f"Converted audio saved to {output_wav}")
|
63 |
+
|
64 |
+
|
65 |
+
# Example usage:
|
66 |
+
# convert_to_mono_16k('input_audio.wav', 'output_audio_16k_mono.wav')
|
67 |
+
|
68 |
+
|
69 |
# Define the RemoteFasterWhisperASR class
|
70 |
class RemoteFasterWhisperASR(ASRBase):
|
71 |
"""Uses a remote FasterWhisper model via WebSocket."""
|
|
|
103 |
response = self.ws.recv()
|
104 |
segments = json.loads(response)
|
105 |
segments = [dict_to_segment(s) for s in segments]
|
106 |
+
logger.info(get_raw_words_from_segments(segments))
|
107 |
return segments
|
108 |
|
109 |
def ts_words(self, segments):
|
|
|
193 |
import numpy as np
|
194 |
import io
|
195 |
import soundfile as sf
|
196 |
+
import wave
|
197 |
|
198 |
# Download the audio file if not already present
|
199 |
AUDIO_FILE_URL = "https://raw.githubusercontent.com/AshDavid12/runpod-serverless-forked/main/test_hebrew.wav"
|
200 |
audio_file_path = "test_hebrew.wav"
|
201 |
+
mono16k_audio_file_path = "mono16k." + audio_file_path
|
202 |
if not os.path.exists(audio_file_path):
|
203 |
response = requests.get(AUDIO_FILE_URL)
|
204 |
with open(audio_file_path, 'wb') as f:
|
205 |
f.write(response.content)
|
206 |
|
207 |
+
if not os.path.exists(mono16k_audio_file_path):
|
208 |
+
convert_to_mono_16k(audio_file_path, mono16k_audio_file_path)
|
209 |
+
|
210 |
# Set up arguments
|
211 |
class Args:
|
212 |
def __init__(self):
|
213 |
+
self.audio_path = mono16k_audio_file_path
|
214 |
self.lan = 'he'
|
215 |
self.model = None # Not used in RemoteFasterWhisperASR
|
216 |
self.model_cache_dir = None
|