retour au debut
Browse files- app.py +82 -174
- requirements.txt +15 -147
app.py
CHANGED
@@ -1,189 +1,97 @@
|
|
1 |
-
import
|
2 |
-
import
|
3 |
-
import time
|
4 |
import gradio as gr
|
|
|
5 |
import whisperx
|
6 |
-
import
|
7 |
-
|
8 |
-
from docx.shared import RGBColor
|
9 |
-
import numpy as np
|
10 |
-
import soundfile as sf
|
11 |
-
from datetime import date
|
12 |
-
from dotenv import load_dotenv
|
13 |
-
|
14 |
-
# Load environment variables from .env file
|
15 |
-
load_dotenv()
|
16 |
-
# Get Hugging Face token from environment variables
|
17 |
-
HUGGINGFACE_TOKEN = os.getenv("HF_TOKEN")
|
18 |
|
19 |
-
|
20 |
-
|
21 |
-
|
|
|
22 |
|
23 |
-
|
24 |
-
SPEAKER_COLORS = {
|
25 |
-
"SPEAKER_00": RGBColor(255, 0, 0), # Red
|
26 |
-
"SPEAKER_01": RGBColor(0, 0, 255), # Blue
|
27 |
-
"SPEAKER_02": RGBColor(0, 128, 0), # Green
|
28 |
-
"SPEAKER_03": RGBColor(128, 0, 128), # Purple
|
29 |
-
"SPEAKER_04": RGBColor(255, 165, 0), # Orange
|
30 |
-
"SPEAKER_05": RGBColor(0, 128, 128), # Teal
|
31 |
-
"SPEAKER_06": RGBColor(139, 69, 19), # Brown
|
32 |
-
"SPEAKER_07": RGBColor(105, 105, 105), # Gray
|
33 |
-
"SPEAKER_08": RGBColor(255, 20, 147), # Pink
|
34 |
-
"SPEAKER_09": RGBColor(0, 191, 255), # Sky Blue
|
35 |
-
}
|
36 |
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
|
|
|
|
43 |
|
44 |
-
def
|
45 |
-
|
46 |
-
|
47 |
-
# 1. Load and transcribe audio with whisperx
|
48 |
-
model = whisperx.load_model(model_name, DEVICE, compute_type=COMPUTE_TYPE)
|
49 |
-
audio = whisperx.load_audio(audio_path)
|
50 |
-
result = model.transcribe(audio, batch_size=16)
|
51 |
-
|
52 |
-
# 2. Align whisper output
|
53 |
-
model_a, metadata = whisperx.load_align_model(language_code=result["language"], device=DEVICE)
|
54 |
-
result = whisperx.align(result["segments"], model_a, metadata, audio, DEVICE, return_char_alignments=False)
|
55 |
-
|
56 |
-
# 3. Assign speaker labels
|
57 |
-
diarize_model = whisperx.DiarizationPipeline(use_auth_token=HUGGINGFACE_TOKEN, device=DEVICE)
|
58 |
-
diarize_segments = diarize_model(audio, min_speakers=1, max_speakers=10)
|
59 |
-
result = whisperx.assign_word_speakers(diarize_segments, result)
|
60 |
-
|
61 |
-
return result, None
|
62 |
-
except Exception as e:
|
63 |
-
return None, str(e)
|
64 |
|
65 |
-
def
|
66 |
-
|
67 |
-
|
68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
|
70 |
-
|
71 |
-
|
72 |
-
|
|
|
|
|
|
|
73 |
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
|
78 |
-
|
79 |
-
|
80 |
-
p.add_run(f"[{start_time} - {end_time}] ").bold = True
|
81 |
-
|
82 |
-
speaker = segment.get("speaker", "UNKNOWN")
|
83 |
-
if speaker in SPEAKER_COLORS:
|
84 |
-
speaker_run = p.add_run(f"{speaker}: ")
|
85 |
-
speaker_run.font.color.rgb = SPEAKER_COLORS[speaker]
|
86 |
-
speaker_run.bold = True
|
87 |
-
else:
|
88 |
-
p.add_run(f"{speaker}: ").bold = True
|
89 |
-
|
90 |
-
p.add_run(segment["text"])
|
91 |
|
92 |
-
|
93 |
-
return output_path
|
94 |
|
95 |
-
|
96 |
-
"""Save the recorded audio to a temporary file."""
|
97 |
-
filename = f"recorded_audio_{int(time.time())}.wav"
|
98 |
-
temp_file = os.path.join(tempfile.gettempdir(), filename)
|
99 |
-
sf.write(temp_file, audio_data, sample_rate)
|
100 |
-
return temp_file, filename
|
101 |
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
else:
|
114 |
-
# Handle uploaded file
|
115 |
-
filepath = audio_input
|
116 |
-
filename = os.path.basename(filepath) if filepath else None
|
117 |
-
is_temp_file = False
|
118 |
-
|
119 |
-
# Transcribe audio
|
120 |
-
result, error = transcribe_audio(filepath, model_name)
|
121 |
-
if error:
|
122 |
-
return None, None, f"Transcription error: {error}", None
|
123 |
-
|
124 |
-
# Export to DOCX
|
125 |
-
docx_path = export_to_docx(result)
|
126 |
-
|
127 |
-
# Prepare display table
|
128 |
-
table_data = []
|
129 |
-
for segment in result["segments"]:
|
130 |
-
start_time = format_time(segment["start"])
|
131 |
-
end_time = format_time(segment["end"])
|
132 |
-
speaker = segment.get("speaker", "UNKNOWN")
|
133 |
-
text = segment["text"]
|
134 |
-
table_data.append([f"{start_time} - {end_time}", speaker, text])
|
135 |
-
|
136 |
-
# Prepare audio for download
|
137 |
-
if is_temp_file:
|
138 |
-
download_path = (filepath, filename)
|
139 |
-
else:
|
140 |
-
# For uploaded files, no need to provide download as user already has the file
|
141 |
-
download_path = None
|
142 |
-
|
143 |
-
return table_data, docx_path, "Transcription completed successfully", download_path
|
144 |
-
except Exception as e:
|
145 |
-
return None, None, f"Error: {str(e)}", None
|
146 |
|
147 |
-
|
148 |
-
|
149 |
-
|
150 |
-
gr.
|
151 |
-
gr.
|
152 |
-
|
153 |
-
|
154 |
-
|
155 |
-
|
156 |
-
|
157 |
-
|
158 |
-
|
159 |
-
|
160 |
-
|
161 |
-
|
162 |
-
|
163 |
-
|
164 |
-
|
165 |
-
|
166 |
-
|
167 |
-
|
168 |
-
|
169 |
-
|
170 |
-
|
171 |
-
|
172 |
-
|
173 |
-
label="Transcription Results"
|
174 |
-
)
|
175 |
-
|
176 |
-
with gr.Row():
|
177 |
-
docx_output = gr.File(label="DOCX Export")
|
178 |
-
audio_download = gr.File(label="Download Recorded Audio")
|
179 |
-
|
180 |
-
process_btn.click(
|
181 |
-
fn=process_audio,
|
182 |
-
inputs=[audio_file, model_dropdown],
|
183 |
-
outputs=[transcription, docx_output, status, audio_download]
|
184 |
-
)
|
185 |
-
|
186 |
-
return interface
|
187 |
|
188 |
-
|
189 |
-
interface.queue().launch(ssr_mode=False)
|
|
|
1 |
+
import spaces
|
2 |
+
import torch
|
|
|
3 |
import gradio as gr
|
4 |
+
import yt_dlp as youtube_dl
|
5 |
import whisperx
|
6 |
+
import tempfile
|
7 |
+
import os
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
+
device = "cuda" if torch.cuda.is_available() else "cpu"
|
10 |
+
BATCH_SIZE = 8
|
11 |
+
FILE_LIMIT_MB = 1000
|
12 |
+
YT_LENGTH_LIMIT_S = 3600 # limit to 1 hour YouTube files
|
13 |
|
14 |
+
model = whisperx.load_model("large-v2", device)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
15 |
|
16 |
+
@spaces.GPU
|
17 |
+
def transcribe(inputs, task):
|
18 |
+
if inputs is None:
|
19 |
+
raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
|
20 |
+
|
21 |
+
audio = whisperx.load_audio(inputs)
|
22 |
+
result = model.transcribe(audio, batch_size=BATCH_SIZE)
|
23 |
+
return result["text"]
|
24 |
|
25 |
+
def _return_yt_html_embed(yt_url):
|
26 |
+
video_id = yt_url.split("?v=")[-1]
|
27 |
+
return f'<center><iframe width="500" height="320" src="https://www.youtube.com/embed/{video_id}"></iframe></center>'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
+
def download_yt_audio(yt_url, filename):
|
30 |
+
ydl_opts = {
|
31 |
+
"format": "bestaudio/best",
|
32 |
+
"outtmpl": filename,
|
33 |
+
"postprocessors": [{
|
34 |
+
"key": "FFmpegExtractAudio",
|
35 |
+
"preferredcodec": "wav",
|
36 |
+
"preferredquality": "192",
|
37 |
+
}],
|
38 |
+
}
|
39 |
|
40 |
+
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
|
41 |
+
ydl.download([yt_url])
|
42 |
+
|
43 |
+
@spaces.GPU
|
44 |
+
def yt_transcribe(yt_url, task):
|
45 |
+
html_embed_str = _return_yt_html_embed(yt_url)
|
46 |
|
47 |
+
with tempfile.TemporaryDirectory() as tmpdirname:
|
48 |
+
filepath = os.path.join(tmpdirname, "audio.wav")
|
49 |
+
download_yt_audio(yt_url, filepath)
|
50 |
|
51 |
+
audio = whisperx.load_audio(filepath)
|
52 |
+
result = model.transcribe(audio, batch_size=BATCH_SIZE)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
53 |
|
54 |
+
return html_embed_str, result["text"]
|
|
|
55 |
|
56 |
+
demo = gr.Blocks(theme=gr.themes.Ocean())
|
|
|
|
|
|
|
|
|
|
|
57 |
|
58 |
+
mf_transcribe = gr.Interface(
|
59 |
+
fn=transcribe,
|
60 |
+
inputs=[
|
61 |
+
gr.Audio(sources="microphone", type="filepath"),
|
62 |
+
gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
|
63 |
+
],
|
64 |
+
outputs="text",
|
65 |
+
title="VerbaLend Demo with WhisperX",
|
66 |
+
description="Transcribe long-form microphone or audio inputs using WhisperX.",
|
67 |
+
allow_flagging="never",
|
68 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
69 |
|
70 |
+
file_transcribe = gr.Interface(
|
71 |
+
fn=transcribe,
|
72 |
+
inputs=[
|
73 |
+
gr.Audio(sources="upload", type="filepath", label="Audio file"),
|
74 |
+
gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
|
75 |
+
],
|
76 |
+
outputs="text",
|
77 |
+
title="VerbaLend Demo with WhisperX",
|
78 |
+
description="Transcribe uploaded audio files using WhisperX.",
|
79 |
+
allow_flagging="never",
|
80 |
+
)
|
81 |
+
|
82 |
+
yt_transcribe = gr.Interface(
|
83 |
+
fn=yt_transcribe,
|
84 |
+
inputs=[
|
85 |
+
gr.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"),
|
86 |
+
gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
|
87 |
+
],
|
88 |
+
outputs=["html", "text"],
|
89 |
+
title="VerbaLend Demo with WhisperX",
|
90 |
+
description="Transcribe YouTube videos using WhisperX.",
|
91 |
+
allow_flagging="never",
|
92 |
+
)
|
93 |
+
|
94 |
+
with demo:
|
95 |
+
gr.TabbedInterface([mf_transcribe, file_transcribe, yt_transcribe], ["Microphone", "Audio file", "YouTube"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
96 |
|
97 |
+
demo.queue().launch(ssr_mode=False)
|
|
requirements.txt
CHANGED
@@ -1,147 +1,15 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
click==8.1.8 ; python_version >= "3.10" and python_version < "3.12"
|
17 |
-
colorama==0.4.6 ; python_version >= "3.10" and python_version < "3.12" and (platform_system == "Windows" or sys_platform == "win32")
|
18 |
-
coloredlogs==15.0.1 ; python_version >= "3.10" and python_version < "3.12"
|
19 |
-
colorlog==6.9.0 ; python_version >= "3.10" and python_version < "3.12"
|
20 |
-
contourpy==1.3.1 ; python_version >= "3.10" and python_version < "3.12"
|
21 |
-
ctranslate2==4.4.0 ; python_version >= "3.10" and python_version < "3.12"
|
22 |
-
cycler==0.12.1 ; python_version >= "3.10" and python_version < "3.12"
|
23 |
-
docopt==0.6.2 ; python_version >= "3.10" and python_version < "3.12"
|
24 |
-
einops==0.8.1 ; python_version >= "3.10" and python_version < "3.12"
|
25 |
-
exceptiongroup==1.2.2 ; python_version == "3.10"
|
26 |
-
fastapi==0.115.11 ; python_version >= "3.10" and python_version < "3.12"
|
27 |
-
faster-whisper==1.1.0 ; python_version >= "3.10" and python_version < "3.12"
|
28 |
-
ffmpy==0.5.0 ; python_version >= "3.10" and python_version < "3.12"
|
29 |
-
filelock==3.17.0 ; python_version >= "3.10" and python_version < "3.12"
|
30 |
-
flatbuffers==25.2.10 ; python_version >= "3.10" and python_version < "3.12"
|
31 |
-
fonttools==4.56.0 ; python_version >= "3.10" and python_version < "3.12"
|
32 |
-
frozenlist==1.5.0 ; python_version >= "3.10" and python_version < "3.12"
|
33 |
-
fsspec==2025.2.0 ; python_version >= "3.10" and python_version < "3.12"
|
34 |
-
gradio-client==1.3.0 ; python_version >= "3.10" and python_version < "3.12"
|
35 |
-
gradio==4.44.1 ; python_version >= "3.10" and python_version < "3.12"
|
36 |
-
greenlet==3.1.1 ; python_version >= "3.10" and python_version < "3.12" and (platform_machine == "aarch64" or platform_machine == "ppc64le" or platform_machine == "x86_64" or platform_machine == "amd64" or platform_machine == "AMD64" or platform_machine == "win32" or platform_machine == "WIN32")
|
37 |
-
h11==0.14.0 ; python_version >= "3.10" and python_version < "3.12"
|
38 |
-
httpcore==1.0.7 ; python_version >= "3.10" and python_version < "3.12"
|
39 |
-
httpx==0.28.1 ; python_version >= "3.10" and python_version < "3.12"
|
40 |
-
huggingface-hub==0.29.1 ; python_version >= "3.10" and python_version < "3.12"
|
41 |
-
humanfriendly==10.0 ; python_version >= "3.10" and python_version < "3.12"
|
42 |
-
hyperpyyaml==1.2.2 ; python_version >= "3.10" and python_version < "3.12"
|
43 |
-
idna==3.10 ; python_version >= "3.10" and python_version < "3.12"
|
44 |
-
importlib-resources==6.5.2 ; python_version >= "3.10" and python_version < "3.12"
|
45 |
-
jinja2==3.1.5 ; python_version >= "3.10" and python_version < "3.12"
|
46 |
-
joblib==1.4.2 ; python_version >= "3.10" and python_version < "3.12"
|
47 |
-
julius==0.2.7 ; python_version >= "3.10" and python_version < "3.12"
|
48 |
-
kiwisolver==1.4.8 ; python_version >= "3.10" and python_version < "3.12"
|
49 |
-
lightning-utilities==0.12.0 ; python_version >= "3.10" and python_version < "3.12"
|
50 |
-
lightning==2.5.0.post0 ; python_version >= "3.10" and python_version < "3.12"
|
51 |
-
lxml==5.3.1 ; python_version >= "3.10" and python_version < "3.12"
|
52 |
-
mako==1.3.9 ; python_version >= "3.10" and python_version < "3.12"
|
53 |
-
markdown-it-py==3.0.0 ; python_version >= "3.10" and python_version < "3.12"
|
54 |
-
markupsafe==2.1.5 ; python_version >= "3.10" and python_version < "3.12"
|
55 |
-
matplotlib==3.10.1 ; python_version >= "3.10" and python_version < "3.12"
|
56 |
-
mdurl==0.1.2 ; python_version >= "3.10" and python_version < "3.12"
|
57 |
-
mpmath==1.3.0 ; python_version >= "3.10" and python_version < "3.12"
|
58 |
-
multidict==6.1.0 ; python_version >= "3.10" and python_version < "3.12"
|
59 |
-
networkx==3.4.2 ; python_version >= "3.10" and python_version < "3.12"
|
60 |
-
nltk==3.9.1 ; python_version >= "3.10" and python_version < "3.12"
|
61 |
-
numpy==1.26.4 ; python_version >= "3.10" and python_version < "3.12"
|
62 |
-
nvidia-cublas-cu12==12.4.5.8 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
|
63 |
-
nvidia-cuda-cupti-cu12==12.4.127 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
|
64 |
-
nvidia-cuda-nvrtc-cu12==12.4.127 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
|
65 |
-
nvidia-cuda-runtime-cu12==12.4.127 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
|
66 |
-
nvidia-cudnn-cu12==9.1.0.70 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
|
67 |
-
nvidia-cufft-cu12==11.2.1.3 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
|
68 |
-
nvidia-curand-cu12==10.3.5.147 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
|
69 |
-
nvidia-cusolver-cu12==11.6.1.9 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
|
70 |
-
nvidia-cusparse-cu12==12.3.1.170 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
|
71 |
-
nvidia-cusparselt-cu12==0.6.2 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
|
72 |
-
nvidia-nccl-cu12==2.21.5 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
|
73 |
-
nvidia-nvjitlink-cu12==12.4.127 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
|
74 |
-
nvidia-nvtx-cu12==12.4.127 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
|
75 |
-
omegaconf==2.3.0 ; python_version >= "3.10" and python_version < "3.12"
|
76 |
-
onnxruntime==1.20.1 ; python_version >= "3.10" and python_version < "3.12"
|
77 |
-
optuna==4.2.1 ; python_version >= "3.10" and python_version < "3.12"
|
78 |
-
orjson==3.10.15 ; python_version >= "3.10" and python_version < "3.12"
|
79 |
-
packaging==24.2 ; python_version >= "3.10" and python_version < "3.12"
|
80 |
-
pandas==2.2.3 ; python_version >= "3.10" and python_version < "3.12"
|
81 |
-
pillow==10.4.0 ; python_version >= "3.10" and python_version < "3.12"
|
82 |
-
primepy==1.3 ; python_version >= "3.10" and python_version < "3.12"
|
83 |
-
propcache==0.3.0 ; python_version >= "3.10" and python_version < "3.12"
|
84 |
-
protobuf==5.29.3 ; python_version >= "3.10" and python_version < "3.12"
|
85 |
-
pyannote-audio==3.3.2 ; python_version >= "3.10" and python_version < "3.12"
|
86 |
-
pyannote-core==5.0.0 ; python_version >= "3.10" and python_version < "3.12"
|
87 |
-
pyannote-database==5.1.3 ; python_version >= "3.10" and python_version < "3.12"
|
88 |
-
pyannote-metrics==3.2.1 ; python_version >= "3.10" and python_version < "3.12"
|
89 |
-
pyannote-pipeline==3.0.1 ; python_version >= "3.10" and python_version < "3.12"
|
90 |
-
pycparser==2.22 ; python_version >= "3.10" and python_version < "3.12"
|
91 |
-
pydantic-core==2.27.2 ; python_version >= "3.10" and python_version < "3.12"
|
92 |
-
pydantic==2.10.6 ; python_version >= "3.10" and python_version < "3.12"
|
93 |
-
pydub==0.25.1 ; python_version >= "3.10" and python_version < "3.12"
|
94 |
-
pygments==2.19.1 ; python_version >= "3.10" and python_version < "3.12"
|
95 |
-
pyparsing==3.2.1 ; python_version >= "3.10" and python_version < "3.12"
|
96 |
-
pyreadline3==3.5.4 ; python_version >= "3.10" and python_version < "3.12" and sys_platform == "win32"
|
97 |
-
python-dateutil==2.9.0.post0 ; python_version >= "3.10" and python_version < "3.12"
|
98 |
-
python-docx==1.1.2 ; python_version >= "3.10" and python_version < "3.12"
|
99 |
-
python-dotenv==1.0.1 ; python_version >= "3.10" and python_version < "3.12"
|
100 |
-
python-multipart==0.0.20 ; python_version >= "3.10" and python_version < "3.12"
|
101 |
-
pytorch-lightning==2.5.0.post0 ; python_version >= "3.10" and python_version < "3.12"
|
102 |
-
pytorch-metric-learning==2.8.1 ; python_version >= "3.10" and python_version < "3.12"
|
103 |
-
pytz==2025.1 ; python_version >= "3.10" and python_version < "3.12"
|
104 |
-
pyyaml==6.0.2 ; python_version >= "3.10" and python_version < "3.12"
|
105 |
-
regex==2024.11.6 ; python_version >= "3.10" and python_version < "3.12"
|
106 |
-
requests==2.32.3 ; python_version >= "3.10" and python_version < "3.12"
|
107 |
-
rich==13.9.4 ; python_version >= "3.10" and python_version < "3.12"
|
108 |
-
ruamel-yaml-clib==0.2.12 ; python_version >= "3.10" and python_version < "3.12" and platform_python_implementation == "CPython"
|
109 |
-
ruamel-yaml==0.18.10 ; python_version >= "3.10" and python_version < "3.12"
|
110 |
-
ruff==0.9.9 ; python_version >= "3.10" and python_version < "3.12" and sys_platform != "emscripten"
|
111 |
-
safetensors==0.5.3 ; python_version >= "3.10" and python_version < "3.12"
|
112 |
-
scikit-learn==1.6.1 ; python_version >= "3.10" and python_version < "3.12"
|
113 |
-
scipy==1.15.2 ; python_version >= "3.10" and python_version < "3.12"
|
114 |
-
semantic-version==2.10.0 ; python_version >= "3.10" and python_version < "3.12"
|
115 |
-
semver==3.0.4 ; python_version >= "3.10" and python_version < "3.12"
|
116 |
-
sentencepiece==0.2.0 ; python_version >= "3.10" and python_version < "3.12"
|
117 |
-
setuptools==75.8.2 ; python_version >= "3.10" and python_version < "3.12"
|
118 |
-
shellingham==1.5.4 ; python_version >= "3.10" and python_version < "3.12"
|
119 |
-
six==1.17.0 ; python_version >= "3.10" and python_version < "3.12"
|
120 |
-
sniffio==1.3.1 ; python_version >= "3.10" and python_version < "3.12"
|
121 |
-
sortedcontainers==2.4.0 ; python_version >= "3.10" and python_version < "3.12"
|
122 |
-
soundfile==0.13.1 ; python_version >= "3.10" and python_version < "3.12"
|
123 |
-
speechbrain==1.0.2 ; python_version >= "3.10" and python_version < "3.12"
|
124 |
-
sqlalchemy==2.0.38 ; python_version >= "3.10" and python_version < "3.12"
|
125 |
-
starlette==0.46.0 ; python_version >= "3.10" and python_version < "3.12"
|
126 |
-
sympy==1.13.1 ; python_version >= "3.10" and python_version < "3.12"
|
127 |
-
tabulate==0.9.0 ; python_version >= "3.10" and python_version < "3.12"
|
128 |
-
tensorboardx==2.6.2.2 ; python_version >= "3.10" and python_version < "3.12"
|
129 |
-
threadpoolctl==3.5.0 ; python_version >= "3.10" and python_version < "3.12"
|
130 |
-
tokenizers==0.21.0 ; python_version >= "3.10" and python_version < "3.12"
|
131 |
-
tomlkit==0.12.0 ; python_version >= "3.10" and python_version < "3.12"
|
132 |
-
torch-audiomentations==0.12.0 ; python_version >= "3.10" and python_version < "3.12"
|
133 |
-
torch-pitch-shift==1.2.5 ; python_version >= "3.10" and python_version < "3.12"
|
134 |
-
torch==2.6.0 ; python_version >= "3.10" and python_version < "3.12"
|
135 |
-
torchaudio==2.6.0 ; python_version >= "3.10" and python_version < "3.12"
|
136 |
-
torchmetrics==1.6.1 ; python_version >= "3.10" and python_version < "3.12"
|
137 |
-
tqdm==4.67.1 ; python_version >= "3.10" and python_version < "3.12"
|
138 |
-
transformers==4.49.0 ; python_version >= "3.10" and python_version < "3.12"
|
139 |
-
triton==3.2.0 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
|
140 |
-
typer==0.15.2 ; python_version >= "3.10" and python_version < "3.12"
|
141 |
-
typing-extensions==4.12.2 ; python_version >= "3.10" and python_version < "3.12"
|
142 |
-
tzdata==2025.1 ; python_version >= "3.10" and python_version < "3.12"
|
143 |
-
urllib3==2.3.0 ; python_version >= "3.10" and python_version < "3.12"
|
144 |
-
uvicorn==0.34.0 ; python_version >= "3.10" and python_version < "3.12" and sys_platform != "emscripten"
|
145 |
-
websockets==12.0 ; python_version >= "3.10" and python_version < "3.12"
|
146 |
-
whisperx==3.3.1 ; python_version >= "3.10" and python_version < "3.12"
|
147 |
-
yarl==1.18.3 ; python_version >= "3.10" and python_version < "3.12"
|
|
|
1 |
+
pydub
|
2 |
+
pandas
|
3 |
+
numpy
|
4 |
+
torch
|
5 |
+
torchaudio
|
6 |
+
pyannote.audio
|
7 |
+
transformers>=4.19.0
|
8 |
+
ffmpeg-python==0.2.0
|
9 |
+
tqdm
|
10 |
+
transformers>=4.19.0
|
11 |
+
yt-dlp
|
12 |
+
tempfile
|
13 |
+
more_itertools
|
14 |
+
faster-whisper
|
15 |
+
git+https://github.com/m-bain/whisperx.git
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|