aviadr1 commited on
Commit
d8dadfc
·
1 Parent(s): dd0871e

sometimes the client works

Browse files

but not always. server seems to work well

Files changed (5) hide show
  1. infer.py +7 -3
  2. model.py +7 -0
  3. poetry.lock +34 -140
  4. pyproject.toml +17 -3
  5. streaming_client.py +46 -2
infer.py CHANGED
@@ -18,13 +18,16 @@ from typing import Optional
18
  import sys
19
  import asyncio
20
 
21
- from model import segment_to_dict
22
 
23
  # Configure logging
24
  logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s',
25
  handlers=[logging.StreamHandler(sys.stdout)], force=True)
26
  logger = logging.getLogger(__name__)
27
  #logging.getLogger("asyncio").setLevel(logging.DEBUG)
 
 
 
28
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
29
  logging.info(f'Device selected: {device}')
30
 
@@ -70,7 +73,8 @@ async def websocket_transcribe(websocket: WebSocket):
70
  audio_file_path = temp_audio_file.name
71
 
72
  # Call the transcribe function
73
- segments, info = await asyncio.to_thread(model.transcribe,
 
74
  audio_file_path,
75
  language='he',
76
  initial_prompt=input_data.init_prompt,
@@ -82,7 +86,7 @@ async def websocket_transcribe(websocket: WebSocket):
82
  # Convert segments to list and serialize
83
  segments_list = list(segments)
84
  segments_serializable = [segment_to_dict(s) for s in segments_list]
85
-
86
  # Send the serialized segments back to the client
87
  await websocket.send_json(segments_serializable)
88
 
 
18
  import sys
19
  import asyncio
20
 
21
+ from model import segment_to_dict, get_raw_words_from_segments
22
 
23
  # Configure logging
24
  logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s',
25
  handlers=[logging.StreamHandler(sys.stdout)], force=True)
26
  logger = logging.getLogger(__name__)
27
  #logging.getLogger("asyncio").setLevel(logging.DEBUG)
28
+
29
+ logging.info(torch.__version__)
30
+ logging.info(torch.version.cuda) # Should show the installed CUDA version
31
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
32
  logging.info(f'Device selected: {device}')
33
 
 
73
  audio_file_path = temp_audio_file.name
74
 
75
  # Call the transcribe function
76
+ # segments, info = await asyncio.to_thread(model.transcribe,
77
+ segments, info = model.transcribe(
78
  audio_file_path,
79
  language='he',
80
  initial_prompt=input_data.init_prompt,
 
86
  # Convert segments to list and serialize
87
  segments_list = list(segments)
88
  segments_serializable = [segment_to_dict(s) for s in segments_list]
89
+ logger.info(get_raw_words_from_segments(segments_list))
90
  # Send the serialized segments back to the client
91
  await websocket.send_json(segments_serializable)
92
 
model.py CHANGED
@@ -51,4 +51,11 @@ def dict_to_segment(data: dict) -> Segment:
51
  compression_ratio=data["compression_ratio"],
52
  no_speech_prob=data["no_speech_prob"],
53
  words=[dict_to_word(word) for word in data["words"]] if data["words"] else None
 
 
 
 
 
 
 
54
  )
 
51
  compression_ratio=data["compression_ratio"],
52
  no_speech_prob=data["no_speech_prob"],
53
  words=[dict_to_word(word) for word in data["words"]] if data["words"] else None
54
+ )
55
+
56
+ def get_raw_words_from_segments(segments: list[Segment]) -> str:
57
+ return " ".join(
58
+ word.word
59
+ for segment in segments if segment.words
60
+ for word in segment.words
61
  )
poetry.lock CHANGED
@@ -1,4 +1,4 @@
1
- # This file is automatically @generated by Poetry 1.7.1 and should not be changed by hand.
2
 
3
  [[package]]
4
  name = "annotated-types"
@@ -2028,31 +2028,21 @@ testing = ["black (==22.3)", "datasets", "numpy", "pytest", "requests", "ruff"]
2028
 
2029
  [[package]]
2030
  name = "torch"
2031
- version = "2.4.1"
2032
  description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration"
2033
  optional = false
2034
  python-versions = ">=3.8.0"
2035
  files = [
2036
- {file = "torch-2.4.1-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:362f82e23a4cd46341daabb76fba08f04cd646df9bfaf5da50af97cb60ca4971"},
2037
- {file = "torch-2.4.1-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:e8ac1985c3ff0f60d85b991954cfc2cc25f79c84545aead422763148ed2759e3"},
2038
- {file = "torch-2.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:91e326e2ccfb1496e3bee58f70ef605aeb27bd26be07ba64f37dcaac3d070ada"},
2039
- {file = "torch-2.4.1-cp310-none-macosx_11_0_arm64.whl", hash = "sha256:d36a8ef100f5bff3e9c3cea934b9e0d7ea277cb8210c7152d34a9a6c5830eadd"},
2040
- {file = "torch-2.4.1-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:0b5f88afdfa05a335d80351e3cea57d38e578c8689f751d35e0ff36bce872113"},
2041
- {file = "torch-2.4.1-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:ef503165f2341942bfdf2bd520152f19540d0c0e34961232f134dc59ad435be8"},
2042
- {file = "torch-2.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:092e7c2280c860eff762ac08c4bdcd53d701677851670695e0c22d6d345b269c"},
2043
- {file = "torch-2.4.1-cp311-none-macosx_11_0_arm64.whl", hash = "sha256:ddddbd8b066e743934a4200b3d54267a46db02106876d21cf31f7da7a96f98ea"},
2044
- {file = "torch-2.4.1-cp312-cp312-manylinux1_x86_64.whl", hash = "sha256:fdc4fe11db3eb93c1115d3e973a27ac7c1a8318af8934ffa36b0370efe28e042"},
2045
- {file = "torch-2.4.1-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:18835374f599207a9e82c262153c20ddf42ea49bc76b6eadad8e5f49729f6e4d"},
2046
- {file = "torch-2.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:ebea70ff30544fc021d441ce6b219a88b67524f01170b1c538d7d3ebb5e7f56c"},
2047
- {file = "torch-2.4.1-cp312-none-macosx_11_0_arm64.whl", hash = "sha256:72b484d5b6cec1a735bf3fa5a1c4883d01748698c5e9cfdbeb4ffab7c7987e0d"},
2048
- {file = "torch-2.4.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:c99e1db4bf0c5347107845d715b4aa1097e601bdc36343d758963055e9599d93"},
2049
- {file = "torch-2.4.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:b57f07e92858db78c5b72857b4f0b33a65b00dc5d68e7948a8494b0314efb880"},
2050
- {file = "torch-2.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:f18197f3f7c15cde2115892b64f17c80dbf01ed72b008020e7da339902742cf6"},
2051
- {file = "torch-2.4.1-cp38-none-macosx_11_0_arm64.whl", hash = "sha256:5fc1d4d7ed265ef853579caf272686d1ed87cebdcd04f2a498f800ffc53dab71"},
2052
- {file = "torch-2.4.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:40f6d3fe3bae74efcf08cb7f8295eaddd8a838ce89e9d26929d4edd6d5e4329d"},
2053
- {file = "torch-2.4.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:c9299c16c9743001ecef515536ac45900247f4338ecdf70746f2461f9e4831db"},
2054
- {file = "torch-2.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:6bce130f2cd2d52ba4e2c6ada461808de7e5eccbac692525337cfb4c19421846"},
2055
- {file = "torch-2.4.1-cp39-none-macosx_11_0_arm64.whl", hash = "sha256:a38de2803ee6050309aac032676536c3d3b6a9804248537e38e098d0e14817ec"},
2056
  ]
2057
 
2058
  [package.dependencies]
@@ -2071,7 +2061,6 @@ nvidia-cusolver-cu12 = {version = "11.4.5.107", markers = "platform_system == \"
2071
  nvidia-cusparse-cu12 = {version = "12.1.0.106", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
2072
  nvidia-nccl-cu12 = {version = "2.20.5", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
2073
  nvidia-nvtx-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
2074
- setuptools = "*"
2075
  sympy = "*"
2076
  triton = {version = "3.0.0", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version < \"3.13\""}
2077
  typing-extensions = ">=4.8.0"
@@ -2080,38 +2069,38 @@ typing-extensions = ">=4.8.0"
2080
  opt-einsum = ["opt-einsum (>=3.3)"]
2081
  optree = ["optree (>=0.11.0)"]
2082
 
 
 
 
 
 
2083
  [[package]]
2084
  name = "torchaudio"
2085
- version = "2.4.1"
2086
  description = "An audio package for PyTorch"
2087
  optional = false
2088
  python-versions = "*"
2089
  files = [
2090
- {file = "torchaudio-2.4.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:661909751909340b24f637410dfec02a888867816c3db19ed4f4102ae105244a"},
2091
- {file = "torchaudio-2.4.1-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:bfc234cef1d03092ea27440fb79e486722ccb41cff94ebaf9d5a1082436395fe"},
2092
- {file = "torchaudio-2.4.1-cp310-cp310-manylinux2014_aarch64.whl", hash = "sha256:54431179d9a9ccf3feeae98aace07d89fae9fd728e2bc8656efbd70e7edcc6f8"},
2093
- {file = "torchaudio-2.4.1-cp310-cp310-win_amd64.whl", hash = "sha256:dec97872215c3122b7718ec47ac63e143565c3cced06444d0225e98bf4dd4b5f"},
2094
- {file = "torchaudio-2.4.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:60af1531815d22659e5412ea401bed552a16c389938c49664e446e4cfd5ddc06"},
2095
- {file = "torchaudio-2.4.1-cp311-cp311-manylinux1_x86_64.whl", hash = "sha256:95a0968569f7f4455bfd242bfcd489ec47ad37d2ba0f3d9f738cd1128a5f775c"},
2096
- {file = "torchaudio-2.4.1-cp311-cp311-manylinux2014_aarch64.whl", hash = "sha256:7640aaffb2056e12f2906187b03a22228a0908c87d0295fddf4b0b92334a290b"},
2097
- {file = "torchaudio-2.4.1-cp311-cp311-win_amd64.whl", hash = "sha256:3c08b42a0c296c8eeee6c533bcae5cfbc0ceae86a34f24fe6bbbb5faa7a7bea1"},
2098
- {file = "torchaudio-2.4.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:953946cf610ffd57bb3fdd228effa2112fa51c5dfe36a96611effc9074a3d3be"},
2099
- {file = "torchaudio-2.4.1-cp312-cp312-manylinux1_x86_64.whl", hash = "sha256:1796a8961decb522c47daab0fbe27c057d6d143ee22bb6ae0d5eb9b2a038c7b6"},
2100
- {file = "torchaudio-2.4.1-cp312-cp312-manylinux2014_aarch64.whl", hash = "sha256:5b62fc7b16ed708b0c07d4393137797e92f63fc3bd5705607d97ba6a9a7cf3f0"},
2101
- {file = "torchaudio-2.4.1-cp312-cp312-win_amd64.whl", hash = "sha256:d721b186aae7bd8752c9ad95213f5d650926597bb9060728dfe476986a1ff570"},
2102
- {file = "torchaudio-2.4.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:4ea0fd00142fe795c75bcc20a303981b56f2327c7f7d321b42a8fef1d78aafa9"},
2103
- {file = "torchaudio-2.4.1-cp38-cp38-manylinux1_x86_64.whl", hash = "sha256:375d8740c8035a50faca7a5afe2fbdb712aa8733715b971b2af61b4003fa1c41"},
2104
- {file = "torchaudio-2.4.1-cp38-cp38-manylinux2014_aarch64.whl", hash = "sha256:74d19cf9ca3dad394afcabb7e6f7ed9ab9f59f2540d502826c7ec3e33985251d"},
2105
- {file = "torchaudio-2.4.1-cp38-cp38-win_amd64.whl", hash = "sha256:40e9fa8fdc8d328ea4aa90be65fd34c5ef975610dbd707545e3664393a8a2497"},
2106
- {file = "torchaudio-2.4.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:3adce550850902b9aa6cd2378ccd720ac9ec8cf31e2eba9743ccc84ffcbe76d6"},
2107
- {file = "torchaudio-2.4.1-cp39-cp39-manylinux1_x86_64.whl", hash = "sha256:98d8e03703f96b13a8d172d1ccdc7badb338227fd762985fdcea6b30f6697bdb"},
2108
- {file = "torchaudio-2.4.1-cp39-cp39-manylinux2014_aarch64.whl", hash = "sha256:36c7e7bc6b358cbf42b769c80206780fa1497d141a985c6b3e7768de44524e9a"},
2109
- {file = "torchaudio-2.4.1-cp39-cp39-win_amd64.whl", hash = "sha256:f46e34ab3866ad8d8ace0673cd11e697c5cde6a3b7a4d8d789207d4d8badbb6e"},
2110
  ]
2111
 
2112
  [package.dependencies]
2113
  torch = "2.4.1"
2114
 
 
 
 
 
 
2115
  [[package]]
2116
  name = "tqdm"
2117
  version = "4.66.5"
@@ -2284,101 +2273,6 @@ docs = ["Sphinx (>=6.0)", "myst-parser (>=2.0.0)", "sphinx-rtd-theme (>=1.1.0)"]
2284
  optional = ["python-socks", "wsaccel"]
2285
  test = ["websockets"]
2286
 
2287
- [[package]]
2288
- name = "websockets"
2289
- version = "13.0.1"
2290
- description = "An implementation of the WebSocket Protocol (RFC 6455 & 7692)"
2291
- optional = false
2292
- python-versions = ">=3.8"
2293
- files = [
2294
- {file = "websockets-13.0.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:1841c9082a3ba4a05ea824cf6d99570a6a2d8849ef0db16e9c826acb28089e8f"},
2295
- {file = "websockets-13.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:c5870b4a11b77e4caa3937142b650fbbc0914a3e07a0cf3131f35c0587489c1c"},
2296
- {file = "websockets-13.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:f1d3d1f2eb79fe7b0fb02e599b2bf76a7619c79300fc55f0b5e2d382881d4f7f"},
2297
- {file = "websockets-13.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:15c7d62ee071fa94a2fc52c2b472fed4af258d43f9030479d9c4a2de885fd543"},
2298
- {file = "websockets-13.0.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6724b554b70d6195ba19650fef5759ef11346f946c07dbbe390e039bcaa7cc3d"},
2299
- {file = "websockets-13.0.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:56a952fa2ae57a42ba7951e6b2605e08a24801a4931b5644dfc68939e041bc7f"},
2300
- {file = "websockets-13.0.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:17118647c0ea14796364299e942c330d72acc4b248e07e639d34b75067b3cdd8"},
2301
- {file = "websockets-13.0.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:64a11aae1de4c178fa653b07d90f2fb1a2ed31919a5ea2361a38760192e1858b"},
2302
- {file = "websockets-13.0.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:0617fd0b1d14309c7eab6ba5deae8a7179959861846cbc5cb528a7531c249448"},
2303
- {file = "websockets-13.0.1-cp310-cp310-win32.whl", hash = "sha256:11f9976ecbc530248cf162e359a92f37b7b282de88d1d194f2167b5e7ad80ce3"},
2304
- {file = "websockets-13.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:c3c493d0e5141ec055a7d6809a28ac2b88d5b878bb22df8c621ebe79a61123d0"},
2305
- {file = "websockets-13.0.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:699ba9dd6a926f82a277063603fc8d586b89f4cb128efc353b749b641fcddda7"},
2306
- {file = "websockets-13.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:cf2fae6d85e5dc384bf846f8243ddaa9197f3a1a70044f59399af001fd1f51d4"},
2307
- {file = "websockets-13.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:52aed6ef21a0f1a2a5e310fb5c42d7555e9c5855476bbd7173c3aa3d8a0302f2"},
2308
- {file = "websockets-13.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8eb2b9a318542153674c6e377eb8cb9ca0fc011c04475110d3477862f15d29f0"},
2309
- {file = "websockets-13.0.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:5df891c86fe68b2c38da55b7aea7095beca105933c697d719f3f45f4220a5e0e"},
2310
- {file = "websockets-13.0.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fac2d146ff30d9dd2fcf917e5d147db037a5c573f0446c564f16f1f94cf87462"},
2311
- {file = "websockets-13.0.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:b8ac5b46fd798bbbf2ac6620e0437c36a202b08e1f827832c4bf050da081b501"},
2312
- {file = "websockets-13.0.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:46af561eba6f9b0848b2c9d2427086cabadf14e0abdd9fde9d72d447df268418"},
2313
- {file = "websockets-13.0.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:b5a06d7f60bc2fc378a333978470dfc4e1415ee52f5f0fce4f7853eb10c1e9df"},
2314
- {file = "websockets-13.0.1-cp311-cp311-win32.whl", hash = "sha256:556e70e4f69be1082e6ef26dcb70efcd08d1850f5d6c5f4f2bcb4e397e68f01f"},
2315
- {file = "websockets-13.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:67494e95d6565bf395476e9d040037ff69c8b3fa356a886b21d8422ad86ae075"},
2316
- {file = "websockets-13.0.1-cp312-cp312-macosx_10_9_universal2.whl", hash = "sha256:f9c9e258e3d5efe199ec23903f5da0eeaad58cf6fccb3547b74fd4750e5ac47a"},
2317
- {file = "websockets-13.0.1-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:6b41a1b3b561f1cba8321fb32987552a024a8f67f0d05f06fcf29f0090a1b956"},
2318
- {file = "websockets-13.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f73e676a46b0fe9426612ce8caeca54c9073191a77c3e9d5c94697aef99296af"},
2319
- {file = "websockets-13.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1f613289f4a94142f914aafad6c6c87903de78eae1e140fa769a7385fb232fdf"},
2320
- {file = "websockets-13.0.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:0f52504023b1480d458adf496dc1c9e9811df4ba4752f0bc1f89ae92f4f07d0c"},
2321
- {file = "websockets-13.0.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:139add0f98206cb74109faf3611b7783ceafc928529c62b389917a037d4cfdf4"},
2322
- {file = "websockets-13.0.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:47236c13be337ef36546004ce8c5580f4b1150d9538b27bf8a5ad8edf23ccfab"},
2323
- {file = "websockets-13.0.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:c44ca9ade59b2e376612df34e837013e2b273e6c92d7ed6636d0556b6f4db93d"},
2324
- {file = "websockets-13.0.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:9bbc525f4be3e51b89b2a700f5746c2a6907d2e2ef4513a8daafc98198b92237"},
2325
- {file = "websockets-13.0.1-cp312-cp312-win32.whl", hash = "sha256:3624fd8664f2577cf8de996db3250662e259bfbc870dd8ebdcf5d7c6ac0b5185"},
2326
- {file = "websockets-13.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:0513c727fb8adffa6d9bf4a4463b2bade0186cbd8c3604ae5540fae18a90cb99"},
2327
- {file = "websockets-13.0.1-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:1ee4cc030a4bdab482a37462dbf3ffb7e09334d01dd37d1063be1136a0d825fa"},
2328
- {file = "websockets-13.0.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:dbb0b697cc0655719522406c059eae233abaa3243821cfdfab1215d02ac10231"},
2329
- {file = "websockets-13.0.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:acbebec8cb3d4df6e2488fbf34702cbc37fc39ac7abf9449392cefb3305562e9"},
2330
- {file = "websockets-13.0.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:63848cdb6fcc0bf09d4a155464c46c64ffdb5807ede4fb251da2c2692559ce75"},
2331
- {file = "websockets-13.0.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:872afa52a9f4c414d6955c365b6588bc4401272c629ff8321a55f44e3f62b553"},
2332
- {file = "websockets-13.0.1-cp313-cp313-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:05e70fec7c54aad4d71eae8e8cab50525e899791fc389ec6f77b95312e4e9920"},
2333
- {file = "websockets-13.0.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e82db3756ccb66266504f5a3de05ac6b32f287faacff72462612120074103329"},
2334
- {file = "websockets-13.0.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:4e85f46ce287f5c52438bb3703d86162263afccf034a5ef13dbe4318e98d86e7"},
2335
- {file = "websockets-13.0.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:f3fea72e4e6edb983908f0db373ae0732b275628901d909c382aae3b592589f2"},
2336
- {file = "websockets-13.0.1-cp313-cp313-win32.whl", hash = "sha256:254ecf35572fca01a9f789a1d0f543898e222f7b69ecd7d5381d8d8047627bdb"},
2337
- {file = "websockets-13.0.1-cp313-cp313-win_amd64.whl", hash = "sha256:ca48914cdd9f2ccd94deab5bcb5ac98025a5ddce98881e5cce762854a5de330b"},
2338
- {file = "websockets-13.0.1-cp38-cp38-macosx_10_9_universal2.whl", hash = "sha256:b74593e9acf18ea5469c3edaa6b27fa7ecf97b30e9dabd5a94c4c940637ab96e"},
2339
- {file = "websockets-13.0.1-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:132511bfd42e77d152c919147078460c88a795af16b50e42a0bd14f0ad71ddd2"},
2340
- {file = "websockets-13.0.1-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:165bedf13556f985a2aa064309baa01462aa79bf6112fbd068ae38993a0e1f1b"},
2341
- {file = "websockets-13.0.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:e801ca2f448850685417d723ec70298feff3ce4ff687c6f20922c7474b4746ae"},
2342
- {file = "websockets-13.0.1-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:30d3a1f041360f029765d8704eae606781e673e8918e6b2c792e0775de51352f"},
2343
- {file = "websockets-13.0.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:67648f5e50231b5a7f6d83b32f9c525e319f0ddc841be0de64f24928cd75a603"},
2344
- {file = "websockets-13.0.1-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:4f0426d51c8f0926a4879390f53c7f5a855e42d68df95fff6032c82c888b5f36"},
2345
- {file = "websockets-13.0.1-cp38-cp38-musllinux_1_2_i686.whl", hash = "sha256:ef48e4137e8799998a343706531e656fdec6797b80efd029117edacb74b0a10a"},
2346
- {file = "websockets-13.0.1-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:249aab278810bee585cd0d4de2f08cfd67eed4fc75bde623be163798ed4db2eb"},
2347
- {file = "websockets-13.0.1-cp38-cp38-win32.whl", hash = "sha256:06c0a667e466fcb56a0886d924b5f29a7f0886199102f0a0e1c60a02a3751cb4"},
2348
- {file = "websockets-13.0.1-cp38-cp38-win_amd64.whl", hash = "sha256:1f3cf6d6ec1142412d4535adabc6bd72a63f5f148c43fe559f06298bc21953c9"},
2349
- {file = "websockets-13.0.1-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:1fa082ea38d5de51dd409434edc27c0dcbd5fed2b09b9be982deb6f0508d25bc"},
2350
- {file = "websockets-13.0.1-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:4a365bcb7be554e6e1f9f3ed64016e67e2fa03d7b027a33e436aecf194febb63"},
2351
- {file = "websockets-13.0.1-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:10a0dc7242215d794fb1918f69c6bb235f1f627aaf19e77f05336d147fce7c37"},
2352
- {file = "websockets-13.0.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:59197afd478545b1f73367620407b0083303569c5f2d043afe5363676f2697c9"},
2353
- {file = "websockets-13.0.1-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7d20516990d8ad557b5abeb48127b8b779b0b7e6771a265fa3e91767596d7d97"},
2354
- {file = "websockets-13.0.1-cp39-cp39-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a1a2e272d067030048e1fe41aa1ec8cfbbaabce733b3d634304fa2b19e5c897f"},
2355
- {file = "websockets-13.0.1-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:ad327ac80ba7ee61da85383ca8822ff808ab5ada0e4a030d66703cc025b021c4"},
2356
- {file = "websockets-13.0.1-cp39-cp39-musllinux_1_2_i686.whl", hash = "sha256:518f90e6dd089d34eaade01101fd8a990921c3ba18ebbe9b0165b46ebff947f0"},
2357
- {file = "websockets-13.0.1-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:68264802399aed6fe9652e89761031acc734fc4c653137a5911c2bfa995d6d6d"},
2358
- {file = "websockets-13.0.1-cp39-cp39-win32.whl", hash = "sha256:a5dc0c42ded1557cc7c3f0240b24129aefbad88af4f09346164349391dea8e58"},
2359
- {file = "websockets-13.0.1-cp39-cp39-win_amd64.whl", hash = "sha256:b448a0690ef43db5ef31b3a0d9aea79043882b4632cfc3eaab20105edecf6097"},
2360
- {file = "websockets-13.0.1-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:faef9ec6354fe4f9a2c0bbb52fb1ff852effc897e2a4501e25eb3a47cb0a4f89"},
2361
- {file = "websockets-13.0.1-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:03d3f9ba172e0a53e37fa4e636b86cc60c3ab2cfee4935e66ed1d7acaa4625ad"},
2362
- {file = "websockets-13.0.1-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d450f5a7a35662a9b91a64aefa852f0c0308ee256122f5218a42f1d13577d71e"},
2363
- {file = "websockets-13.0.1-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:3f55b36d17ac50aa8a171b771e15fbe1561217510c8768af3d546f56c7576cdc"},
2364
- {file = "websockets-13.0.1-pp310-pypy310_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:14b9c006cac63772b31abbcd3e3abb6228233eec966bf062e89e7fa7ae0b7333"},
2365
- {file = "websockets-13.0.1-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:b79915a1179a91f6c5f04ece1e592e2e8a6bd245a0e45d12fd56b2b59e559a32"},
2366
- {file = "websockets-13.0.1-pp38-pypy38_pp73-macosx_10_9_x86_64.whl", hash = "sha256:f40de079779acbcdbb6ed4c65af9f018f8b77c5ec4e17a4b737c05c2db554491"},
2367
- {file = "websockets-13.0.1-pp38-pypy38_pp73-macosx_11_0_arm64.whl", hash = "sha256:80e4ba642fc87fa532bac07e5ed7e19d56940b6af6a8c61d4429be48718a380f"},
2368
- {file = "websockets-13.0.1-pp38-pypy38_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2a02b0161c43cc9e0232711eff846569fad6ec836a7acab16b3cf97b2344c060"},
2369
- {file = "websockets-13.0.1-pp38-pypy38_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:6aa74a45d4cdc028561a7d6ab3272c8b3018e23723100b12e58be9dfa5a24491"},
2370
- {file = "websockets-13.0.1-pp38-pypy38_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:00fd961943b6c10ee6f0b1130753e50ac5dcd906130dcd77b0003c3ab797d026"},
2371
- {file = "websockets-13.0.1-pp38-pypy38_pp73-win_amd64.whl", hash = "sha256:d93572720d781331fb10d3da9ca1067817d84ad1e7c31466e9f5e59965618096"},
2372
- {file = "websockets-13.0.1-pp39-pypy39_pp73-macosx_10_15_x86_64.whl", hash = "sha256:71e6e5a3a3728886caee9ab8752e8113670936a193284be9d6ad2176a137f376"},
2373
- {file = "websockets-13.0.1-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:c4a6343e3b0714e80da0b0893543bf9a5b5fa71b846ae640e56e9abc6fbc4c83"},
2374
- {file = "websockets-13.0.1-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1a678532018e435396e37422a95e3ab87f75028ac79570ad11f5bf23cd2a7d8c"},
2375
- {file = "websockets-13.0.1-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:d6716c087e4aa0b9260c4e579bb82e068f84faddb9bfba9906cb87726fa2e870"},
2376
- {file = "websockets-13.0.1-pp39-pypy39_pp73-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e33505534f3f673270dd67f81e73550b11de5b538c56fe04435d63c02c3f26b5"},
2377
- {file = "websockets-13.0.1-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:acab3539a027a85d568c2573291e864333ec9d912675107d6efceb7e2be5d980"},
2378
- {file = "websockets-13.0.1-py3-none-any.whl", hash = "sha256:b80f0c51681c517604152eb6a572f5a9378f877763231fddb883ba2f968e8817"},
2379
- {file = "websockets-13.0.1.tar.gz", hash = "sha256:4d6ece65099411cfd9a48d13701d7438d9c34f479046b34c50ff60bb8834e43e"},
2380
- ]
2381
-
2382
  [[package]]
2383
  name = "whisper"
2384
  version = "1.1.10"
@@ -2395,4 +2289,4 @@ six = "*"
2395
  [metadata]
2396
  lock-version = "2.0"
2397
  python-versions = "3.11.7"
2398
- content-hash = "e9b4bf090c740e4db80a0ee561bdd74326f63b28e0681053da3c5be1977d012b"
 
1
+ # This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
2
 
3
  [[package]]
4
  name = "annotated-types"
 
2028
 
2029
  [[package]]
2030
  name = "torch"
2031
+ version = "2.4.1+cu121"
2032
  description = "Tensors and Dynamic neural networks in Python with strong GPU acceleration"
2033
  optional = false
2034
  python-versions = ">=3.8.0"
2035
  files = [
2036
+ {file = "torch-2.4.1+cu121-cp310-cp310-linux_x86_64.whl", hash = "sha256:9a5f0b103cfe840b3568416aa5067f6e7b9fec67d9c5659fd43b1207450fe975"},
2037
+ {file = "torch-2.4.1+cu121-cp310-cp310-win_amd64.whl", hash = "sha256:fe3bf682e86c08d6a8ec0ee30811732487fa688fc556d6e8f92d853d85507c0d"},
2038
+ {file = "torch-2.4.1+cu121-cp311-cp311-linux_x86_64.whl", hash = "sha256:914d128e5abcbbe79ca1b9eb5311b185444f1b2d7117df555fe418487ecfb894"},
2039
+ {file = "torch-2.4.1+cu121-cp311-cp311-win_amd64.whl", hash = "sha256:bc1e21d7412a2f06f552a9afb92c56c8b23d174884e9383259c3cf5db4687c98"},
2040
+ {file = "torch-2.4.1+cu121-cp312-cp312-linux_x86_64.whl", hash = "sha256:ab491610b15551e08da74bab29d0933e6bf10bab44fb7d4b1328f1e845c05a53"},
2041
+ {file = "torch-2.4.1+cu121-cp312-cp312-win_amd64.whl", hash = "sha256:b30faf3224697eaed131939690e8877b05b4d4cb6da5b12cfdcba3d742e9afd0"},
2042
+ {file = "torch-2.4.1+cu121-cp38-cp38-linux_x86_64.whl", hash = "sha256:cb4f502f910b47e1e366ccf7b231dac2967d2efb47d4b8cb33fc63b4bc5eeed8"},
2043
+ {file = "torch-2.4.1+cu121-cp38-cp38-win_amd64.whl", hash = "sha256:a48b991cd861266523cbed4705f89bef09669d5d2bbfa2524486156f74a222a8"},
2044
+ {file = "torch-2.4.1+cu121-cp39-cp39-linux_x86_64.whl", hash = "sha256:9986ad3555ddfff55e925d8298f8b2b49106a7dc60f811a2076a445fe4458e2b"},
2045
+ {file = "torch-2.4.1+cu121-cp39-cp39-win_amd64.whl", hash = "sha256:2ca012a78d7a2777c290a4b79cb2130bf65fdda89f533a8172674034c2a1519c"},
 
 
 
 
 
 
 
 
 
 
2046
  ]
2047
 
2048
  [package.dependencies]
 
2061
  nvidia-cusparse-cu12 = {version = "12.1.0.106", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
2062
  nvidia-nccl-cu12 = {version = "2.20.5", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
2063
  nvidia-nvtx-cu12 = {version = "12.1.105", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""}
 
2064
  sympy = "*"
2065
  triton = {version = "3.0.0", markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\" and python_version < \"3.13\""}
2066
  typing-extensions = ">=4.8.0"
 
2069
  opt-einsum = ["opt-einsum (>=3.3)"]
2070
  optree = ["optree (>=0.11.0)"]
2071
 
2072
+ [package.source]
2073
+ type = "legacy"
2074
+ url = "https://download.pytorch.org/whl/cu121"
2075
+ reference = "pytorch"
2076
+
2077
  [[package]]
2078
  name = "torchaudio"
2079
+ version = "2.4.1+cu121"
2080
  description = "An audio package for PyTorch"
2081
  optional = false
2082
  python-versions = "*"
2083
  files = [
2084
+ {file = "torchaudio-2.4.1+cu121-cp310-cp310-linux_x86_64.whl", hash = "sha256:da8c87c80a1c1376a48dc33eef30b03bbdf1df25a05bd2b1c620b8811c7b19be"},
2085
+ {file = "torchaudio-2.4.1+cu121-cp310-cp310-win_amd64.whl", hash = "sha256:317f0827ff010bf71ce9b52ce240856742272440fc9542d8f8ea98212f39d654"},
2086
+ {file = "torchaudio-2.4.1+cu121-cp311-cp311-linux_x86_64.whl", hash = "sha256:01b04edb9135a7d60fa0100fc01ffb40a0858010f599ae6418f40e09e88e681b"},
2087
+ {file = "torchaudio-2.4.1+cu121-cp311-cp311-win_amd64.whl", hash = "sha256:06095d5941a2843663a230bef739680541e0a491caf41df8fb1552eeefd212b0"},
2088
+ {file = "torchaudio-2.4.1+cu121-cp312-cp312-linux_x86_64.whl", hash = "sha256:6b74d706aba81db5f838ca414f03d3f6598ea880b72106065dbc9c5d3c063fe1"},
2089
+ {file = "torchaudio-2.4.1+cu121-cp312-cp312-win_amd64.whl", hash = "sha256:1dbcafb1bd954fc33bf34e6062be7b2005cfe3bde82bc938d08e650aa9c9c08c"},
2090
+ {file = "torchaudio-2.4.1+cu121-cp38-cp38-linux_x86_64.whl", hash = "sha256:fcfbf11699295f65b04581cd04372fe02a239806eb7d25da2746f35de0f5d2d9"},
2091
+ {file = "torchaudio-2.4.1+cu121-cp38-cp38-win_amd64.whl", hash = "sha256:bafc2e099349c487b8311e2417431eea24b0f0844baafd452403e4e24200ae58"},
2092
+ {file = "torchaudio-2.4.1+cu121-cp39-cp39-linux_x86_64.whl", hash = "sha256:71bbb06c1018799db3a0bcc094dd08b80bc28bb7b5f27ab8b0e2f38b014b11c6"},
2093
+ {file = "torchaudio-2.4.1+cu121-cp39-cp39-win_amd64.whl", hash = "sha256:e316c5aa0cf5b844589c5b5803d75846300e6b62edf5921833a13f3022a11b9d"},
 
 
 
 
 
 
 
 
 
 
2094
  ]
2095
 
2096
  [package.dependencies]
2097
  torch = "2.4.1"
2098
 
2099
+ [package.source]
2100
+ type = "legacy"
2101
+ url = "https://download.pytorch.org/whl/cu121"
2102
+ reference = "pytorch"
2103
+
2104
  [[package]]
2105
  name = "tqdm"
2106
  version = "4.66.5"
 
2273
  optional = ["python-socks", "wsaccel"]
2274
  test = ["websockets"]
2275
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2276
  [[package]]
2277
  name = "whisper"
2278
  version = "1.1.10"
 
2289
  [metadata]
2290
  lock-version = "2.0"
2291
  python-versions = "3.11.7"
2292
+ content-hash = "c1e0fffbd7c3ee70d76ab8e44ccdf25c3008b6da1c494d8eafc7c9e3de5fdc65"
pyproject.toml CHANGED
@@ -24,7 +24,8 @@ python = "3.11.7"
24
  #numpy = "^1.22.0"
25
  #torch = "2.1.0"
26
  #sounddevice = "^0.5.0"
27
- torch = "^2.4.1"
 
28
  whisper = "^1.1.10"
29
  requests = "^2.32.3"
30
  transformers = "^4.44.2"
@@ -32,10 +33,13 @@ soundfile = "^0.12.1"
32
  faster-whisper = "^1.0.3"
33
  fastapi = "^0.114.2"
34
  websockets = "^13.0.1"
35
- websocket-client = "^1.8.0"
36
  librosa = "^0.10.2.post1"
37
  uvicorn = "^0.30.6"
38
- torchaudio = "^2.4.1"
 
 
 
39
  silero-vad = "^5.1"
40
  #openai = "^1.42.0"
41
  #numpy = "^1.22.0"
@@ -45,6 +49,16 @@ silero-vad = "^5.1"
45
  #ffmpeg = "^1.4"
46
 
47
 
 
 
 
 
 
 
 
 
 
 
48
 
49
 
50
 
 
24
  #numpy = "^1.22.0"
25
  #torch = "2.1.0"
26
  #sounddevice = "^0.5.0"
27
+ # torch = "^2.4.1"
28
+ torch = { version = "^2.0", source = "pytorch" }
29
  whisper = "^1.1.10"
30
  requests = "^2.32.3"
31
  transformers = "^4.44.2"
 
33
  faster-whisper = "^1.0.3"
34
  fastapi = "^0.114.2"
35
  websockets = "^13.0.1"
36
+ #websocket-client = "^1.8.0"
37
  librosa = "^0.10.2.post1"
38
  uvicorn = "^0.30.6"
39
+ # torchaudio = "^2.4.1"
40
+ torchaudio = { version = "^2.0", source = "pytorch" }
41
+
42
+
43
  silero-vad = "^5.1"
44
  #openai = "^1.42.0"
45
  #numpy = "^1.22.0"
 
49
  #ffmpeg = "^1.4"
50
 
51
 
52
+ [[tool.poetry.source]]
53
+ name = "pytorch"
54
+ url = "https://download.pytorch.org/whl/cu121"
55
+ priority = "explicit"
56
+
57
+
58
+ [[tool.poetry.source]]
59
+ name = "PyPI"
60
+ priority = "primary"
61
+
62
 
63
 
64
 
streaming_client.py CHANGED
@@ -4,6 +4,8 @@ import sys
4
  import time
5
  import logging
6
  import os
 
 
7
  import requests
8
 
9
  import json
@@ -12,6 +14,8 @@ import numpy as np
12
  import soundfile as sf
13
  import io
14
 
 
 
15
  # Import the necessary components from whisper_online.py
16
  from libs.whisper_streaming.whisper_online import (
17
  ASRBase,
@@ -24,10 +28,44 @@ from libs.whisper_streaming.whisper_online import (
24
  load_audio,
25
  load_audio_chunk, OpenaiApiASR,
26
  )
27
- from model import dict_to_segment
28
 
 
 
29
  logger = logging.getLogger(__name__)
30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  # Define the RemoteFasterWhisperASR class
32
  class RemoteFasterWhisperASR(ASRBase):
33
  """Uses a remote FasterWhisper model via WebSocket."""
@@ -65,6 +103,7 @@ class RemoteFasterWhisperASR(ASRBase):
65
  response = self.ws.recv()
66
  segments = json.loads(response)
67
  segments = [dict_to_segment(s) for s in segments]
 
68
  return segments
69
 
70
  def ts_words(self, segments):
@@ -154,19 +193,24 @@ def main():
154
  import numpy as np
155
  import io
156
  import soundfile as sf
 
157
 
158
  # Download the audio file if not already present
159
  AUDIO_FILE_URL = "https://raw.githubusercontent.com/AshDavid12/runpod-serverless-forked/main/test_hebrew.wav"
160
  audio_file_path = "test_hebrew.wav"
 
161
  if not os.path.exists(audio_file_path):
162
  response = requests.get(AUDIO_FILE_URL)
163
  with open(audio_file_path, 'wb') as f:
164
  f.write(response.content)
165
 
 
 
 
166
  # Set up arguments
167
  class Args:
168
  def __init__(self):
169
- self.audio_path = audio_file_path
170
  self.lan = 'he'
171
  self.model = None # Not used in RemoteFasterWhisperASR
172
  self.model_cache_dir = None
 
4
  import time
5
  import logging
6
  import os
7
+ from wave import Wave_read
8
+
9
  import requests
10
 
11
  import json
 
14
  import soundfile as sf
15
  import io
16
 
17
+ import librosa
18
+
19
  # Import the necessary components from whisper_online.py
20
  from libs.whisper_streaming.whisper_online import (
21
  ASRBase,
 
28
  load_audio,
29
  load_audio_chunk, OpenaiApiASR,
30
  )
31
+ from model import dict_to_segment, get_raw_words_from_segments
32
 
33
+ logging.basicConfig(level=logging.INFO, format='%(asctime)s %(levelname)s: %(message)s',
34
+ handlers=[logging.StreamHandler(sys.stdout)], force=True)
35
  logger = logging.getLogger(__name__)
36
 
37
+
38
+
39
+ def convert_to_mono_16k(input_wav: str, output_wav: str) -> None:
40
+ """
41
+ Converts any .wav file to mono 16 kHz.
42
+
43
+ Args:
44
+ input_wav (str): Path to the input .wav file.
45
+ output_wav (str): Path to save the output .wav file with mono 16 kHz.
46
+ """
47
+ # Step 1: Load the audio file with librosa
48
+ audio_data, original_sr = librosa.load(input_wav, sr=None, mono=False) # Load at original sampling rate
49
+ logger.info("Loaded audio with shape: %s, original sampling rate: %d" % (audio_data.shape, original_sr))
50
+
51
+ # Step 2: If the audio has multiple channels, average them to make it mono
52
+ if audio_data.ndim > 1:
53
+ audio_data = librosa.to_mono(audio_data)
54
+
55
+ # Step 3: Resample the audio to 16 kHz
56
+ target_sr = 16000
57
+ resampled_audio = librosa.resample(audio_data, orig_sr=original_sr, target_sr=target_sr)
58
+
59
+ # Step 4: Save the resampled audio as a .wav file in mono at 16 kHz
60
+ sf.write(output_wav, resampled_audio, target_sr)
61
+
62
+ logger.info(f"Converted audio saved to {output_wav}")
63
+
64
+
65
+ # Example usage:
66
+ # convert_to_mono_16k('input_audio.wav', 'output_audio_16k_mono.wav')
67
+
68
+
69
  # Define the RemoteFasterWhisperASR class
70
  class RemoteFasterWhisperASR(ASRBase):
71
  """Uses a remote FasterWhisper model via WebSocket."""
 
103
  response = self.ws.recv()
104
  segments = json.loads(response)
105
  segments = [dict_to_segment(s) for s in segments]
106
+ logger.info(get_raw_words_from_segments(segments))
107
  return segments
108
 
109
  def ts_words(self, segments):
 
193
  import numpy as np
194
  import io
195
  import soundfile as sf
196
+ import wave
197
 
198
  # Download the audio file if not already present
199
  AUDIO_FILE_URL = "https://raw.githubusercontent.com/AshDavid12/runpod-serverless-forked/main/test_hebrew.wav"
200
  audio_file_path = "test_hebrew.wav"
201
+ mono16k_audio_file_path = "mono16k." + audio_file_path
202
  if not os.path.exists(audio_file_path):
203
  response = requests.get(AUDIO_FILE_URL)
204
  with open(audio_file_path, 'wb') as f:
205
  f.write(response.content)
206
 
207
+ if not os.path.exists(mono16k_audio_file_path):
208
+ convert_to_mono_16k(audio_file_path, mono16k_audio_file_path)
209
+
210
  # Set up arguments
211
  class Args:
212
  def __init__(self):
213
+ self.audio_path = mono16k_audio_file_path
214
  self.lan = 'he'
215
  self.model = None # Not used in RemoteFasterWhisperASR
216
  self.model_cache_dir = None