update v2
Browse files- app.py +184 -138
- requirements.txt +147 -13
app.py
CHANGED
@@ -1,143 +1,189 @@
|
|
1 |
-
import spaces
|
2 |
-
import torch
|
3 |
-
import torchaudio
|
4 |
-
import gradio as gr
|
5 |
-
import yt_dlp as youtube_dl
|
6 |
-
from faster_whisper import WhisperModel
|
7 |
-
from transformers.pipelines.audio_utils import ffmpeg_read
|
8 |
-
import tempfile
|
9 |
import os
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
#
|
25 |
-
|
26 |
-
|
27 |
-
#
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
speaker_texts.append(f"{speaker}: {spoken_text}")
|
55 |
-
return "\n".join(speaker_texts)
|
56 |
-
|
57 |
-
def _return_yt_html_embed(yt_url):
|
58 |
-
video_id = yt_url.split("?v=")[-1]
|
59 |
-
HTML_str = (
|
60 |
-
f'<center> <iframe width="500" height="320" src="https://www.youtube.com/embed/{video_id}"> </iframe>'
|
61 |
-
" </center>"
|
62 |
-
)
|
63 |
-
return HTML_str
|
64 |
-
|
65 |
-
def download_yt_audio(yt_url, filename):
|
66 |
-
info_loader = youtube_dl.YoutubeDL()
|
67 |
-
|
68 |
try:
|
69 |
-
|
70 |
-
|
71 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
ydl_opts = {"outtmpl": filename, "format": "bestaudio/best"}
|
78 |
-
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
|
79 |
-
try:
|
80 |
-
ydl.download([yt_url])
|
81 |
-
except youtube_dl.utils.ExtractorError as err:
|
82 |
-
raise gr.Error(str(err))
|
83 |
-
|
84 |
-
@spaces.GPU
|
85 |
-
def yt_transcribe(yt_url, task):
|
86 |
-
html_embed_str = _return_yt_html_embed(yt_url)
|
87 |
-
|
88 |
-
with tempfile.TemporaryDirectory() as tmpdirname:
|
89 |
-
filepath = os.path.join(tmpdirname, "video.mp4")
|
90 |
-
download_yt_audio(yt_url, filepath)
|
91 |
-
with open(filepath, "rb") as f:
|
92 |
-
inputs = f.read()
|
93 |
|
94 |
-
|
95 |
-
|
96 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
97 |
|
98 |
-
|
99 |
-
|
100 |
-
|
101 |
-
|
102 |
-
|
103 |
-
|
104 |
-
|
105 |
-
|
106 |
-
|
107 |
-
|
108 |
-
|
109 |
-
|
110 |
-
|
111 |
-
|
112 |
-
|
113 |
-
|
114 |
-
)
|
115 |
-
|
116 |
-
|
117 |
-
|
118 |
-
|
119 |
-
|
120 |
-
|
121 |
-
|
122 |
-
|
123 |
-
|
124 |
-
|
125 |
-
|
126 |
-
)
|
127 |
-
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
133 |
-
|
134 |
-
|
135 |
-
|
136 |
-
|
137 |
-
|
138 |
-
)
|
139 |
-
|
140 |
-
|
141 |
-
|
142 |
-
|
143 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import os
|
2 |
+
import tempfile
|
3 |
+
import time
|
4 |
+
import gradio as gr
|
5 |
+
import whisperx
|
6 |
+
import torch
|
7 |
+
from docx import Document
|
8 |
+
from docx.shared import RGBColor
|
9 |
+
import numpy as np
|
10 |
+
import soundfile as sf
|
11 |
+
from datetime import date
|
12 |
+
from dotenv import load_dotenv
|
13 |
+
|
14 |
+
# Load environment variables from .env file
|
15 |
+
load_dotenv()
|
16 |
+
# Get Hugging Face token from environment variables
|
17 |
+
HUGGINGFACE_TOKEN = os.getenv("HF_TOKEN")
|
18 |
+
|
19 |
+
# Set device for computation
|
20 |
+
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
21 |
+
COMPUTE_TYPE = "float16" if torch.cuda.is_available() else "int8"
|
22 |
+
|
23 |
+
# Speaker colors for docx export
|
24 |
+
SPEAKER_COLORS = {
|
25 |
+
"SPEAKER_00": RGBColor(255, 0, 0), # Red
|
26 |
+
"SPEAKER_01": RGBColor(0, 0, 255), # Blue
|
27 |
+
"SPEAKER_02": RGBColor(0, 128, 0), # Green
|
28 |
+
"SPEAKER_03": RGBColor(128, 0, 128), # Purple
|
29 |
+
"SPEAKER_04": RGBColor(255, 165, 0), # Orange
|
30 |
+
"SPEAKER_05": RGBColor(0, 128, 128), # Teal
|
31 |
+
"SPEAKER_06": RGBColor(139, 69, 19), # Brown
|
32 |
+
"SPEAKER_07": RGBColor(105, 105, 105), # Gray
|
33 |
+
"SPEAKER_08": RGBColor(255, 20, 147), # Pink
|
34 |
+
"SPEAKER_09": RGBColor(0, 191, 255), # Sky Blue
|
35 |
+
}
|
36 |
+
|
37 |
+
def format_time(seconds):
|
38 |
+
"""Convert seconds to HH:MM:SS format."""
|
39 |
+
hours = int(seconds // 3600)
|
40 |
+
minutes = int((seconds % 3600) // 60)
|
41 |
+
seconds = int(seconds % 60)
|
42 |
+
return f"{hours:02d}:{minutes:02d}:{seconds:02d}"
|
43 |
+
|
44 |
+
def transcribe_audio(audio_path, model_name="large-v2"):
|
45 |
+
"""Transcribe audio using WhisperX and identify speakers."""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
try:
|
47 |
+
# 1. Load and transcribe audio with whisperx
|
48 |
+
model = whisperx.load_model(model_name, DEVICE, compute_type=COMPUTE_TYPE)
|
49 |
+
audio = whisperx.load_audio(audio_path)
|
50 |
+
result = model.transcribe(audio, batch_size=16)
|
51 |
+
|
52 |
+
# 2. Align whisper output
|
53 |
+
model_a, metadata = whisperx.load_align_model(language_code=result["language"], device=DEVICE)
|
54 |
+
result = whisperx.align(result["segments"], model_a, metadata, audio, DEVICE, return_char_alignments=False)
|
55 |
+
|
56 |
+
# 3. Assign speaker labels
|
57 |
+
diarize_model = whisperx.DiarizationPipeline(use_auth_token=HUGGINGFACE_TOKEN, device=DEVICE)
|
58 |
+
diarize_segments = diarize_model(audio, min_speakers=1, max_speakers=10)
|
59 |
+
result = whisperx.assign_word_speakers(diarize_segments, result)
|
60 |
+
|
61 |
+
return result, None
|
62 |
+
except Exception as e:
|
63 |
+
return None, str(e)
|
64 |
+
|
65 |
+
def export_to_docx(result, output_path=None):
|
66 |
+
"""Export transcription to DOCX with timecodes and color-coded speakers."""
|
67 |
+
if output_path is None:
|
68 |
+
output_path = os.path.join(tempfile.gettempdir(), f"transcript_{int(time.time())}.docx")
|
69 |
|
70 |
+
formatted_date = date.today().strftime("%d/%m/%Y")
|
71 |
+
doc = Document()
|
72 |
+
doc.add_heading('Transcription ' + formatted_date, 0)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
|
74 |
+
for segment in result["segments"]:
|
75 |
+
start_time = format_time(segment["start"])
|
76 |
+
end_time = format_time(segment["end"])
|
77 |
+
|
78 |
+
# Create a paragraph for this segment
|
79 |
+
p = doc.add_paragraph()
|
80 |
+
p.add_run(f"[{start_time} - {end_time}] ").bold = True
|
81 |
+
|
82 |
+
speaker = segment.get("speaker", "UNKNOWN")
|
83 |
+
if speaker in SPEAKER_COLORS:
|
84 |
+
speaker_run = p.add_run(f"{speaker}: ")
|
85 |
+
speaker_run.font.color.rgb = SPEAKER_COLORS[speaker]
|
86 |
+
speaker_run.bold = True
|
87 |
+
else:
|
88 |
+
p.add_run(f"{speaker}: ").bold = True
|
89 |
+
|
90 |
+
p.add_run(segment["text"])
|
91 |
|
92 |
+
doc.save(output_path)
|
93 |
+
return output_path
|
94 |
+
|
95 |
+
def save_audio(audio_data, sample_rate):
|
96 |
+
"""Save the recorded audio to a temporary file."""
|
97 |
+
filename = f"recorded_audio_{int(time.time())}.wav"
|
98 |
+
temp_file = os.path.join(tempfile.gettempdir(), filename)
|
99 |
+
sf.write(temp_file, audio_data, sample_rate)
|
100 |
+
return temp_file, filename
|
101 |
+
|
102 |
+
def process_audio(audio_input=None, model_name="large-v2"):
|
103 |
+
"""Process the audio file or recording."""
|
104 |
+
try:
|
105 |
+
if audio_input is None:
|
106 |
+
return None, None, "No audio provided", None
|
107 |
+
|
108 |
+
# Determine if it's a file path (upload) or tuple (recording)
|
109 |
+
if isinstance(audio_input, tuple) and len(audio_input) >= 2:
|
110 |
+
# Handle recorded audio
|
111 |
+
filepath, filename = save_audio(audio_input[0], audio_input[1])
|
112 |
+
is_temp_file = True
|
113 |
+
else:
|
114 |
+
# Handle uploaded file
|
115 |
+
filepath = audio_input
|
116 |
+
filename = os.path.basename(filepath) if filepath else None
|
117 |
+
is_temp_file = False
|
118 |
+
|
119 |
+
# Transcribe audio
|
120 |
+
result, error = transcribe_audio(filepath, model_name)
|
121 |
+
if error:
|
122 |
+
return None, None, f"Transcription error: {error}", None
|
123 |
+
|
124 |
+
# Export to DOCX
|
125 |
+
docx_path = export_to_docx(result)
|
126 |
+
|
127 |
+
# Prepare display table
|
128 |
+
table_data = []
|
129 |
+
for segment in result["segments"]:
|
130 |
+
start_time = format_time(segment["start"])
|
131 |
+
end_time = format_time(segment["end"])
|
132 |
+
speaker = segment.get("speaker", "UNKNOWN")
|
133 |
+
text = segment["text"]
|
134 |
+
table_data.append([f"{start_time} - {end_time}", speaker, text])
|
135 |
+
|
136 |
+
# Prepare audio for download
|
137 |
+
if is_temp_file:
|
138 |
+
download_path = (filepath, filename)
|
139 |
+
else:
|
140 |
+
# For uploaded files, no need to provide download as user already has the file
|
141 |
+
download_path = None
|
142 |
+
|
143 |
+
return table_data, docx_path, "Transcription completed successfully", download_path
|
144 |
+
except Exception as e:
|
145 |
+
return None, None, f"Error: {str(e)}", None
|
146 |
+
|
147 |
+
def create_interface():
|
148 |
+
"""Create the Gradio interface."""
|
149 |
+
with gr.Blocks(title="WhisperX Transcription") as interface:
|
150 |
+
gr.Markdown("# 🎙️ Audio Transcription with Speaker Identification")
|
151 |
+
gr.Markdown("Upload an audio file or record directly to transcribe and identify speakers.")
|
152 |
+
|
153 |
+
with gr.Row():
|
154 |
+
with gr.Column():
|
155 |
+
gr.Markdown("### Input")
|
156 |
+
with gr.Tab("Audio Input"):
|
157 |
+
audio_file = gr.Audio(sources=["upload", "microphone"], type="filepath", label="Upload or Record Audio (MP3 or WAV)")
|
158 |
+
|
159 |
+
model_dropdown = gr.Dropdown(
|
160 |
+
choices=["large-v2", "large-v3", "medium", "small", "base", "tiny"],
|
161 |
+
value="large-v2",
|
162 |
+
label="Whisper Model",
|
163 |
+
info="Larger models are more accurate but require more computational resources"
|
164 |
+
)
|
165 |
+
|
166 |
+
process_btn = gr.Button("Transcribe", variant="primary")
|
167 |
+
|
168 |
+
with gr.Column():
|
169 |
+
gr.Markdown("### Results")
|
170 |
+
status = gr.Textbox(label="Status", interactive=False)
|
171 |
+
transcription = gr.DataFrame(
|
172 |
+
headers=["Time", "Speaker", "Text"],
|
173 |
+
label="Transcription Results"
|
174 |
+
)
|
175 |
+
|
176 |
+
with gr.Row():
|
177 |
+
docx_output = gr.File(label="DOCX Export")
|
178 |
+
audio_download = gr.File(label="Download Recorded Audio")
|
179 |
+
|
180 |
+
process_btn.click(
|
181 |
+
fn=process_audio,
|
182 |
+
inputs=[audio_file, model_dropdown],
|
183 |
+
outputs=[transcription, docx_output, status, audio_download]
|
184 |
+
)
|
185 |
+
|
186 |
+
return interface
|
187 |
+
|
188 |
+
interface = create_interface()
|
189 |
+
interface.queue.launch(ssr_mode=False)
|
requirements.txt
CHANGED
@@ -1,13 +1,147 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
aiofiles==23.2.1 ; python_version >= "3.10" and python_version < "3.12"
|
2 |
+
aiohappyeyeballs==2.4.6 ; python_version >= "3.10" and python_version < "3.12"
|
3 |
+
aiohttp==3.11.13 ; python_version >= "3.10" and python_version < "3.12"
|
4 |
+
aiosignal==1.3.2 ; python_version >= "3.10" and python_version < "3.12"
|
5 |
+
alembic==1.14.1 ; python_version >= "3.10" and python_version < "3.12"
|
6 |
+
annotated-types==0.7.0 ; python_version >= "3.10" and python_version < "3.12"
|
7 |
+
antlr4-python3-runtime==4.9.3 ; python_version >= "3.10" and python_version < "3.12"
|
8 |
+
anyio==4.8.0 ; python_version >= "3.10" and python_version < "3.12"
|
9 |
+
asteroid-filterbanks==0.4.0 ; python_version >= "3.10" and python_version < "3.12"
|
10 |
+
async-timeout==5.0.1 ; python_version == "3.10"
|
11 |
+
attrs==25.1.0 ; python_version >= "3.10" and python_version < "3.12"
|
12 |
+
av==14.2.0 ; python_version >= "3.10" and python_version < "3.12"
|
13 |
+
certifi==2025.1.31 ; python_version >= "3.10" and python_version < "3.12"
|
14 |
+
cffi==1.17.1 ; python_version >= "3.10" and python_version < "3.12"
|
15 |
+
charset-normalizer==3.4.1 ; python_version >= "3.10" and python_version < "3.12"
|
16 |
+
click==8.1.8 ; python_version >= "3.10" and python_version < "3.12"
|
17 |
+
colorama==0.4.6 ; python_version >= "3.10" and python_version < "3.12" and (platform_system == "Windows" or sys_platform == "win32")
|
18 |
+
coloredlogs==15.0.1 ; python_version >= "3.10" and python_version < "3.12"
|
19 |
+
colorlog==6.9.0 ; python_version >= "3.10" and python_version < "3.12"
|
20 |
+
contourpy==1.3.1 ; python_version >= "3.10" and python_version < "3.12"
|
21 |
+
ctranslate2==4.4.0 ; python_version >= "3.10" and python_version < "3.12"
|
22 |
+
cycler==0.12.1 ; python_version >= "3.10" and python_version < "3.12"
|
23 |
+
docopt==0.6.2 ; python_version >= "3.10" and python_version < "3.12"
|
24 |
+
einops==0.8.1 ; python_version >= "3.10" and python_version < "3.12"
|
25 |
+
exceptiongroup==1.2.2 ; python_version == "3.10"
|
26 |
+
fastapi==0.115.11 ; python_version >= "3.10" and python_version < "3.12"
|
27 |
+
faster-whisper==1.1.0 ; python_version >= "3.10" and python_version < "3.12"
|
28 |
+
ffmpy==0.5.0 ; python_version >= "3.10" and python_version < "3.12"
|
29 |
+
filelock==3.17.0 ; python_version >= "3.10" and python_version < "3.12"
|
30 |
+
flatbuffers==25.2.10 ; python_version >= "3.10" and python_version < "3.12"
|
31 |
+
fonttools==4.56.0 ; python_version >= "3.10" and python_version < "3.12"
|
32 |
+
frozenlist==1.5.0 ; python_version >= "3.10" and python_version < "3.12"
|
33 |
+
fsspec==2025.2.0 ; python_version >= "3.10" and python_version < "3.12"
|
34 |
+
gradio-client==1.3.0 ; python_version >= "3.10" and python_version < "3.12"
|
35 |
+
gradio==4.44.1 ; python_version >= "3.10" and python_version < "3.12"
|
36 |
+
greenlet==3.1.1 ; python_version >= "3.10" and python_version < "3.12" and (platform_machine == "aarch64" or platform_machine == "ppc64le" or platform_machine == "x86_64" or platform_machine == "amd64" or platform_machine == "AMD64" or platform_machine == "win32" or platform_machine == "WIN32")
|
37 |
+
h11==0.14.0 ; python_version >= "3.10" and python_version < "3.12"
|
38 |
+
httpcore==1.0.7 ; python_version >= "3.10" and python_version < "3.12"
|
39 |
+
httpx==0.28.1 ; python_version >= "3.10" and python_version < "3.12"
|
40 |
+
huggingface-hub==0.29.1 ; python_version >= "3.10" and python_version < "3.12"
|
41 |
+
humanfriendly==10.0 ; python_version >= "3.10" and python_version < "3.12"
|
42 |
+
hyperpyyaml==1.2.2 ; python_version >= "3.10" and python_version < "3.12"
|
43 |
+
idna==3.10 ; python_version >= "3.10" and python_version < "3.12"
|
44 |
+
importlib-resources==6.5.2 ; python_version >= "3.10" and python_version < "3.12"
|
45 |
+
jinja2==3.1.5 ; python_version >= "3.10" and python_version < "3.12"
|
46 |
+
joblib==1.4.2 ; python_version >= "3.10" and python_version < "3.12"
|
47 |
+
julius==0.2.7 ; python_version >= "3.10" and python_version < "3.12"
|
48 |
+
kiwisolver==1.4.8 ; python_version >= "3.10" and python_version < "3.12"
|
49 |
+
lightning-utilities==0.12.0 ; python_version >= "3.10" and python_version < "3.12"
|
50 |
+
lightning==2.5.0.post0 ; python_version >= "3.10" and python_version < "3.12"
|
51 |
+
lxml==5.3.1 ; python_version >= "3.10" and python_version < "3.12"
|
52 |
+
mako==1.3.9 ; python_version >= "3.10" and python_version < "3.12"
|
53 |
+
markdown-it-py==3.0.0 ; python_version >= "3.10" and python_version < "3.12"
|
54 |
+
markupsafe==2.1.5 ; python_version >= "3.10" and python_version < "3.12"
|
55 |
+
matplotlib==3.10.1 ; python_version >= "3.10" and python_version < "3.12"
|
56 |
+
mdurl==0.1.2 ; python_version >= "3.10" and python_version < "3.12"
|
57 |
+
mpmath==1.3.0 ; python_version >= "3.10" and python_version < "3.12"
|
58 |
+
multidict==6.1.0 ; python_version >= "3.10" and python_version < "3.12"
|
59 |
+
networkx==3.4.2 ; python_version >= "3.10" and python_version < "3.12"
|
60 |
+
nltk==3.9.1 ; python_version >= "3.10" and python_version < "3.12"
|
61 |
+
numpy==1.26.4 ; python_version >= "3.10" and python_version < "3.12"
|
62 |
+
nvidia-cublas-cu12==12.4.5.8 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
|
63 |
+
nvidia-cuda-cupti-cu12==12.4.127 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
|
64 |
+
nvidia-cuda-nvrtc-cu12==12.4.127 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
|
65 |
+
nvidia-cuda-runtime-cu12==12.4.127 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
|
66 |
+
nvidia-cudnn-cu12==9.1.0.70 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
|
67 |
+
nvidia-cufft-cu12==11.2.1.3 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
|
68 |
+
nvidia-curand-cu12==10.3.5.147 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
|
69 |
+
nvidia-cusolver-cu12==11.6.1.9 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
|
70 |
+
nvidia-cusparse-cu12==12.3.1.170 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
|
71 |
+
nvidia-cusparselt-cu12==0.6.2 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
|
72 |
+
nvidia-nccl-cu12==2.21.5 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
|
73 |
+
nvidia-nvjitlink-cu12==12.4.127 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
|
74 |
+
nvidia-nvtx-cu12==12.4.127 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
|
75 |
+
omegaconf==2.3.0 ; python_version >= "3.10" and python_version < "3.12"
|
76 |
+
onnxruntime==1.20.1 ; python_version >= "3.10" and python_version < "3.12"
|
77 |
+
optuna==4.2.1 ; python_version >= "3.10" and python_version < "3.12"
|
78 |
+
orjson==3.10.15 ; python_version >= "3.10" and python_version < "3.12"
|
79 |
+
packaging==24.2 ; python_version >= "3.10" and python_version < "3.12"
|
80 |
+
pandas==2.2.3 ; python_version >= "3.10" and python_version < "3.12"
|
81 |
+
pillow==10.4.0 ; python_version >= "3.10" and python_version < "3.12"
|
82 |
+
primepy==1.3 ; python_version >= "3.10" and python_version < "3.12"
|
83 |
+
propcache==0.3.0 ; python_version >= "3.10" and python_version < "3.12"
|
84 |
+
protobuf==5.29.3 ; python_version >= "3.10" and python_version < "3.12"
|
85 |
+
pyannote-audio==3.3.2 ; python_version >= "3.10" and python_version < "3.12"
|
86 |
+
pyannote-core==5.0.0 ; python_version >= "3.10" and python_version < "3.12"
|
87 |
+
pyannote-database==5.1.3 ; python_version >= "3.10" and python_version < "3.12"
|
88 |
+
pyannote-metrics==3.2.1 ; python_version >= "3.10" and python_version < "3.12"
|
89 |
+
pyannote-pipeline==3.0.1 ; python_version >= "3.10" and python_version < "3.12"
|
90 |
+
pycparser==2.22 ; python_version >= "3.10" and python_version < "3.12"
|
91 |
+
pydantic-core==2.27.2 ; python_version >= "3.10" and python_version < "3.12"
|
92 |
+
pydantic==2.10.6 ; python_version >= "3.10" and python_version < "3.12"
|
93 |
+
pydub==0.25.1 ; python_version >= "3.10" and python_version < "3.12"
|
94 |
+
pygments==2.19.1 ; python_version >= "3.10" and python_version < "3.12"
|
95 |
+
pyparsing==3.2.1 ; python_version >= "3.10" and python_version < "3.12"
|
96 |
+
pyreadline3==3.5.4 ; python_version >= "3.10" and python_version < "3.12" and sys_platform == "win32"
|
97 |
+
python-dateutil==2.9.0.post0 ; python_version >= "3.10" and python_version < "3.12"
|
98 |
+
python-docx==1.1.2 ; python_version >= "3.10" and python_version < "3.12"
|
99 |
+
python-dotenv==1.0.1 ; python_version >= "3.10" and python_version < "3.12"
|
100 |
+
python-multipart==0.0.20 ; python_version >= "3.10" and python_version < "3.12"
|
101 |
+
pytorch-lightning==2.5.0.post0 ; python_version >= "3.10" and python_version < "3.12"
|
102 |
+
pytorch-metric-learning==2.8.1 ; python_version >= "3.10" and python_version < "3.12"
|
103 |
+
pytz==2025.1 ; python_version >= "3.10" and python_version < "3.12"
|
104 |
+
pyyaml==6.0.2 ; python_version >= "3.10" and python_version < "3.12"
|
105 |
+
regex==2024.11.6 ; python_version >= "3.10" and python_version < "3.12"
|
106 |
+
requests==2.32.3 ; python_version >= "3.10" and python_version < "3.12"
|
107 |
+
rich==13.9.4 ; python_version >= "3.10" and python_version < "3.12"
|
108 |
+
ruamel-yaml-clib==0.2.12 ; python_version >= "3.10" and python_version < "3.12" and platform_python_implementation == "CPython"
|
109 |
+
ruamel-yaml==0.18.10 ; python_version >= "3.10" and python_version < "3.12"
|
110 |
+
ruff==0.9.9 ; python_version >= "3.10" and python_version < "3.12" and sys_platform != "emscripten"
|
111 |
+
safetensors==0.5.3 ; python_version >= "3.10" and python_version < "3.12"
|
112 |
+
scikit-learn==1.6.1 ; python_version >= "3.10" and python_version < "3.12"
|
113 |
+
scipy==1.15.2 ; python_version >= "3.10" and python_version < "3.12"
|
114 |
+
semantic-version==2.10.0 ; python_version >= "3.10" and python_version < "3.12"
|
115 |
+
semver==3.0.4 ; python_version >= "3.10" and python_version < "3.12"
|
116 |
+
sentencepiece==0.2.0 ; python_version >= "3.10" and python_version < "3.12"
|
117 |
+
setuptools==75.8.2 ; python_version >= "3.10" and python_version < "3.12"
|
118 |
+
shellingham==1.5.4 ; python_version >= "3.10" and python_version < "3.12"
|
119 |
+
six==1.17.0 ; python_version >= "3.10" and python_version < "3.12"
|
120 |
+
sniffio==1.3.1 ; python_version >= "3.10" and python_version < "3.12"
|
121 |
+
sortedcontainers==2.4.0 ; python_version >= "3.10" and python_version < "3.12"
|
122 |
+
soundfile==0.13.1 ; python_version >= "3.10" and python_version < "3.12"
|
123 |
+
speechbrain==1.0.2 ; python_version >= "3.10" and python_version < "3.12"
|
124 |
+
sqlalchemy==2.0.38 ; python_version >= "3.10" and python_version < "3.12"
|
125 |
+
starlette==0.46.0 ; python_version >= "3.10" and python_version < "3.12"
|
126 |
+
sympy==1.13.1 ; python_version >= "3.10" and python_version < "3.12"
|
127 |
+
tabulate==0.9.0 ; python_version >= "3.10" and python_version < "3.12"
|
128 |
+
tensorboardx==2.6.2.2 ; python_version >= "3.10" and python_version < "3.12"
|
129 |
+
threadpoolctl==3.5.0 ; python_version >= "3.10" and python_version < "3.12"
|
130 |
+
tokenizers==0.21.0 ; python_version >= "3.10" and python_version < "3.12"
|
131 |
+
tomlkit==0.12.0 ; python_version >= "3.10" and python_version < "3.12"
|
132 |
+
torch-audiomentations==0.12.0 ; python_version >= "3.10" and python_version < "3.12"
|
133 |
+
torch-pitch-shift==1.2.5 ; python_version >= "3.10" and python_version < "3.12"
|
134 |
+
torch==2.6.0 ; python_version >= "3.10" and python_version < "3.12"
|
135 |
+
torchaudio==2.6.0 ; python_version >= "3.10" and python_version < "3.12"
|
136 |
+
torchmetrics==1.6.1 ; python_version >= "3.10" and python_version < "3.12"
|
137 |
+
tqdm==4.67.1 ; python_version >= "3.10" and python_version < "3.12"
|
138 |
+
transformers==4.49.0 ; python_version >= "3.10" and python_version < "3.12"
|
139 |
+
triton==3.2.0 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
|
140 |
+
typer==0.15.2 ; python_version >= "3.10" and python_version < "3.12"
|
141 |
+
typing-extensions==4.12.2 ; python_version >= "3.10" and python_version < "3.12"
|
142 |
+
tzdata==2025.1 ; python_version >= "3.10" and python_version < "3.12"
|
143 |
+
urllib3==2.3.0 ; python_version >= "3.10" and python_version < "3.12"
|
144 |
+
uvicorn==0.34.0 ; python_version >= "3.10" and python_version < "3.12" and sys_platform != "emscripten"
|
145 |
+
websockets==12.0 ; python_version >= "3.10" and python_version < "3.12"
|
146 |
+
whisperx==3.3.1 ; python_version >= "3.10" and python_version < "3.12"
|
147 |
+
yarl==1.18.3 ; python_version >= "3.10" and python_version < "3.12"
|