Ngoufack commited on
Commit
3335977
·
1 Parent(s): ea3c466

retour au debut

Browse files
Files changed (2) hide show
  1. app.py +82 -174
  2. requirements.txt +15 -147
app.py CHANGED
@@ -1,189 +1,97 @@
1
- import os
2
- import tempfile
3
- import time
4
  import gradio as gr
 
5
  import whisperx
6
- import torch
7
- from docx import Document
8
- from docx.shared import RGBColor
9
- import numpy as np
10
- import soundfile as sf
11
- from datetime import date
12
- from dotenv import load_dotenv
13
-
14
- # Load environment variables from .env file
15
- load_dotenv()
16
- # Get Hugging Face token from environment variables
17
- HUGGINGFACE_TOKEN = os.getenv("HF_TOKEN")
18
 
19
- # Set device for computation
20
- DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
21
- COMPUTE_TYPE = "float16" if torch.cuda.is_available() else "int8"
 
22
 
23
- # Speaker colors for docx export
24
- SPEAKER_COLORS = {
25
- "SPEAKER_00": RGBColor(255, 0, 0), # Red
26
- "SPEAKER_01": RGBColor(0, 0, 255), # Blue
27
- "SPEAKER_02": RGBColor(0, 128, 0), # Green
28
- "SPEAKER_03": RGBColor(128, 0, 128), # Purple
29
- "SPEAKER_04": RGBColor(255, 165, 0), # Orange
30
- "SPEAKER_05": RGBColor(0, 128, 128), # Teal
31
- "SPEAKER_06": RGBColor(139, 69, 19), # Brown
32
- "SPEAKER_07": RGBColor(105, 105, 105), # Gray
33
- "SPEAKER_08": RGBColor(255, 20, 147), # Pink
34
- "SPEAKER_09": RGBColor(0, 191, 255), # Sky Blue
35
- }
36
 
37
- def format_time(seconds):
38
- """Convert seconds to HH:MM:SS format."""
39
- hours = int(seconds // 3600)
40
- minutes = int((seconds % 3600) // 60)
41
- seconds = int(seconds % 60)
42
- return f"{hours:02d}:{minutes:02d}:{seconds:02d}"
 
 
43
 
44
- def transcribe_audio(audio_path, model_name="large-v2"):
45
- """Transcribe audio using WhisperX and identify speakers."""
46
- try:
47
- # 1. Load and transcribe audio with whisperx
48
- model = whisperx.load_model(model_name, DEVICE, compute_type=COMPUTE_TYPE)
49
- audio = whisperx.load_audio(audio_path)
50
- result = model.transcribe(audio, batch_size=16)
51
-
52
- # 2. Align whisper output
53
- model_a, metadata = whisperx.load_align_model(language_code=result["language"], device=DEVICE)
54
- result = whisperx.align(result["segments"], model_a, metadata, audio, DEVICE, return_char_alignments=False)
55
-
56
- # 3. Assign speaker labels
57
- diarize_model = whisperx.DiarizationPipeline(use_auth_token=HUGGINGFACE_TOKEN, device=DEVICE)
58
- diarize_segments = diarize_model(audio, min_speakers=1, max_speakers=10)
59
- result = whisperx.assign_word_speakers(diarize_segments, result)
60
-
61
- return result, None
62
- except Exception as e:
63
- return None, str(e)
64
 
65
- def export_to_docx(result, output_path=None):
66
- """Export transcription to DOCX with timecodes and color-coded speakers."""
67
- if output_path is None:
68
- output_path = os.path.join(tempfile.gettempdir(), f"transcript_{int(time.time())}.docx")
 
 
 
 
 
 
69
 
70
- formatted_date = date.today().strftime("%d/%m/%Y")
71
- doc = Document()
72
- doc.add_heading('Transcription ' + formatted_date, 0)
 
 
 
73
 
74
- for segment in result["segments"]:
75
- start_time = format_time(segment["start"])
76
- end_time = format_time(segment["end"])
77
 
78
- # Create a paragraph for this segment
79
- p = doc.add_paragraph()
80
- p.add_run(f"[{start_time} - {end_time}] ").bold = True
81
-
82
- speaker = segment.get("speaker", "UNKNOWN")
83
- if speaker in SPEAKER_COLORS:
84
- speaker_run = p.add_run(f"{speaker}: ")
85
- speaker_run.font.color.rgb = SPEAKER_COLORS[speaker]
86
- speaker_run.bold = True
87
- else:
88
- p.add_run(f"{speaker}: ").bold = True
89
-
90
- p.add_run(segment["text"])
91
 
92
- doc.save(output_path)
93
- return output_path
94
 
95
- def save_audio(audio_data, sample_rate):
96
- """Save the recorded audio to a temporary file."""
97
- filename = f"recorded_audio_{int(time.time())}.wav"
98
- temp_file = os.path.join(tempfile.gettempdir(), filename)
99
- sf.write(temp_file, audio_data, sample_rate)
100
- return temp_file, filename
101
 
102
- def process_audio(audio_input=None, model_name="large-v2"):
103
- """Process the audio file or recording."""
104
- try:
105
- if audio_input is None:
106
- return None, None, "No audio provided", None
107
-
108
- # Determine if it's a file path (upload) or tuple (recording)
109
- if isinstance(audio_input, tuple) and len(audio_input) >= 2:
110
- # Handle recorded audio
111
- filepath, filename = save_audio(audio_input[0], audio_input[1])
112
- is_temp_file = True
113
- else:
114
- # Handle uploaded file
115
- filepath = audio_input
116
- filename = os.path.basename(filepath) if filepath else None
117
- is_temp_file = False
118
-
119
- # Transcribe audio
120
- result, error = transcribe_audio(filepath, model_name)
121
- if error:
122
- return None, None, f"Transcription error: {error}", None
123
-
124
- # Export to DOCX
125
- docx_path = export_to_docx(result)
126
-
127
- # Prepare display table
128
- table_data = []
129
- for segment in result["segments"]:
130
- start_time = format_time(segment["start"])
131
- end_time = format_time(segment["end"])
132
- speaker = segment.get("speaker", "UNKNOWN")
133
- text = segment["text"]
134
- table_data.append([f"{start_time} - {end_time}", speaker, text])
135
-
136
- # Prepare audio for download
137
- if is_temp_file:
138
- download_path = (filepath, filename)
139
- else:
140
- # For uploaded files, no need to provide download as user already has the file
141
- download_path = None
142
-
143
- return table_data, docx_path, "Transcription completed successfully", download_path
144
- except Exception as e:
145
- return None, None, f"Error: {str(e)}", None
146
 
147
- def create_interface():
148
- """Create the Gradio interface."""
149
- with gr.Blocks(title="WhisperX Transcription") as interface:
150
- gr.Markdown("# Verbalens Prototype : Audio Transcription with Speaker Identification")
151
- gr.Markdown("Upload an audio file or record directly to transcribe and identify speakers.")
152
-
153
- with gr.Row():
154
- with gr.Column():
155
- gr.Markdown("### Input")
156
- with gr.Tab("Audio Input"):
157
- audio_file = gr.Audio(sources=["upload", "microphone"], type="filepath", label="Upload or Record Audio (MP3 or WAV)")
158
-
159
- model_dropdown = gr.Dropdown(
160
- choices=["large-v2", "large-v3", "medium", "small", "base", "tiny"],
161
- value="large-v2",
162
- label="Whisper Model",
163
- info="Larger models are more accurate but require more computational resources"
164
- )
165
-
166
- process_btn = gr.Button("Transcribe", variant="primary")
167
-
168
- with gr.Column():
169
- gr.Markdown("### Results")
170
- status = gr.Textbox(label="Status", interactive=False)
171
- transcription = gr.DataFrame(
172
- headers=["Time", "Speaker", "Text"],
173
- label="Transcription Results"
174
- )
175
-
176
- with gr.Row():
177
- docx_output = gr.File(label="DOCX Export")
178
- audio_download = gr.File(label="Download Recorded Audio")
179
-
180
- process_btn.click(
181
- fn=process_audio,
182
- inputs=[audio_file, model_dropdown],
183
- outputs=[transcription, docx_output, status, audio_download]
184
- )
185
-
186
- return interface
187
 
188
- interface = create_interface()
189
- interface.queue().launch(ssr_mode=False)
 
1
+ import spaces
2
+ import torch
 
3
  import gradio as gr
4
+ import yt_dlp as youtube_dl
5
  import whisperx
6
+ import tempfile
7
+ import os
 
 
 
 
 
 
 
 
 
 
8
 
9
+ device = "cuda" if torch.cuda.is_available() else "cpu"
10
+ BATCH_SIZE = 8
11
+ FILE_LIMIT_MB = 1000
12
+ YT_LENGTH_LIMIT_S = 3600 # limit to 1 hour YouTube files
13
 
14
+ model = whisperx.load_model("large-v2", device)
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
+ @spaces.GPU
17
+ def transcribe(inputs, task):
18
+ if inputs is None:
19
+ raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
20
+
21
+ audio = whisperx.load_audio(inputs)
22
+ result = model.transcribe(audio, batch_size=BATCH_SIZE)
23
+ return result["text"]
24
 
25
+ def _return_yt_html_embed(yt_url):
26
+ video_id = yt_url.split("?v=")[-1]
27
+ return f'<center><iframe width="500" height="320" src="https://www.youtube.com/embed/{video_id}"></iframe></center>'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
 
29
+ def download_yt_audio(yt_url, filename):
30
+ ydl_opts = {
31
+ "format": "bestaudio/best",
32
+ "outtmpl": filename,
33
+ "postprocessors": [{
34
+ "key": "FFmpegExtractAudio",
35
+ "preferredcodec": "wav",
36
+ "preferredquality": "192",
37
+ }],
38
+ }
39
 
40
+ with youtube_dl.YoutubeDL(ydl_opts) as ydl:
41
+ ydl.download([yt_url])
42
+
43
+ @spaces.GPU
44
+ def yt_transcribe(yt_url, task):
45
+ html_embed_str = _return_yt_html_embed(yt_url)
46
 
47
+ with tempfile.TemporaryDirectory() as tmpdirname:
48
+ filepath = os.path.join(tmpdirname, "audio.wav")
49
+ download_yt_audio(yt_url, filepath)
50
 
51
+ audio = whisperx.load_audio(filepath)
52
+ result = model.transcribe(audio, batch_size=BATCH_SIZE)
 
 
 
 
 
 
 
 
 
 
 
53
 
54
+ return html_embed_str, result["text"]
 
55
 
56
+ demo = gr.Blocks(theme=gr.themes.Ocean())
 
 
 
 
 
57
 
58
+ mf_transcribe = gr.Interface(
59
+ fn=transcribe,
60
+ inputs=[
61
+ gr.Audio(sources="microphone", type="filepath"),
62
+ gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
63
+ ],
64
+ outputs="text",
65
+ title="VerbaLend Demo with WhisperX",
66
+ description="Transcribe long-form microphone or audio inputs using WhisperX.",
67
+ allow_flagging="never",
68
+ )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69
 
70
+ file_transcribe = gr.Interface(
71
+ fn=transcribe,
72
+ inputs=[
73
+ gr.Audio(sources="upload", type="filepath", label="Audio file"),
74
+ gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
75
+ ],
76
+ outputs="text",
77
+ title="VerbaLend Demo with WhisperX",
78
+ description="Transcribe uploaded audio files using WhisperX.",
79
+ allow_flagging="never",
80
+ )
81
+
82
+ yt_transcribe = gr.Interface(
83
+ fn=yt_transcribe,
84
+ inputs=[
85
+ gr.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"),
86
+ gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
87
+ ],
88
+ outputs=["html", "text"],
89
+ title="VerbaLend Demo with WhisperX",
90
+ description="Transcribe YouTube videos using WhisperX.",
91
+ allow_flagging="never",
92
+ )
93
+
94
+ with demo:
95
+ gr.TabbedInterface([mf_transcribe, file_transcribe, yt_transcribe], ["Microphone", "Audio file", "YouTube"])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
 
97
+ demo.queue().launch(ssr_mode=False)
 
requirements.txt CHANGED
@@ -1,147 +1,15 @@
1
- aiofiles==23.2.1 ; python_version >= "3.10" and python_version < "3.12"
2
- aiohappyeyeballs==2.4.6 ; python_version >= "3.10" and python_version < "3.12"
3
- aiohttp==3.11.13 ; python_version >= "3.10" and python_version < "3.12"
4
- aiosignal==1.3.2 ; python_version >= "3.10" and python_version < "3.12"
5
- alembic==1.14.1 ; python_version >= "3.10" and python_version < "3.12"
6
- annotated-types==0.7.0 ; python_version >= "3.10" and python_version < "3.12"
7
- antlr4-python3-runtime==4.9.3 ; python_version >= "3.10" and python_version < "3.12"
8
- anyio==4.8.0 ; python_version >= "3.10" and python_version < "3.12"
9
- asteroid-filterbanks==0.4.0 ; python_version >= "3.10" and python_version < "3.12"
10
- async-timeout==5.0.1 ; python_version == "3.10"
11
- attrs==25.1.0 ; python_version >= "3.10" and python_version < "3.12"
12
- av==14.2.0 ; python_version >= "3.10" and python_version < "3.12"
13
- certifi==2025.1.31 ; python_version >= "3.10" and python_version < "3.12"
14
- cffi==1.17.1 ; python_version >= "3.10" and python_version < "3.12"
15
- charset-normalizer==3.4.1 ; python_version >= "3.10" and python_version < "3.12"
16
- click==8.1.8 ; python_version >= "3.10" and python_version < "3.12"
17
- colorama==0.4.6 ; python_version >= "3.10" and python_version < "3.12" and (platform_system == "Windows" or sys_platform == "win32")
18
- coloredlogs==15.0.1 ; python_version >= "3.10" and python_version < "3.12"
19
- colorlog==6.9.0 ; python_version >= "3.10" and python_version < "3.12"
20
- contourpy==1.3.1 ; python_version >= "3.10" and python_version < "3.12"
21
- ctranslate2==4.4.0 ; python_version >= "3.10" and python_version < "3.12"
22
- cycler==0.12.1 ; python_version >= "3.10" and python_version < "3.12"
23
- docopt==0.6.2 ; python_version >= "3.10" and python_version < "3.12"
24
- einops==0.8.1 ; python_version >= "3.10" and python_version < "3.12"
25
- exceptiongroup==1.2.2 ; python_version == "3.10"
26
- fastapi==0.115.11 ; python_version >= "3.10" and python_version < "3.12"
27
- faster-whisper==1.1.0 ; python_version >= "3.10" and python_version < "3.12"
28
- ffmpy==0.5.0 ; python_version >= "3.10" and python_version < "3.12"
29
- filelock==3.17.0 ; python_version >= "3.10" and python_version < "3.12"
30
- flatbuffers==25.2.10 ; python_version >= "3.10" and python_version < "3.12"
31
- fonttools==4.56.0 ; python_version >= "3.10" and python_version < "3.12"
32
- frozenlist==1.5.0 ; python_version >= "3.10" and python_version < "3.12"
33
- fsspec==2025.2.0 ; python_version >= "3.10" and python_version < "3.12"
34
- gradio-client==1.3.0 ; python_version >= "3.10" and python_version < "3.12"
35
- gradio==4.44.1 ; python_version >= "3.10" and python_version < "3.12"
36
- greenlet==3.1.1 ; python_version >= "3.10" and python_version < "3.12" and (platform_machine == "aarch64" or platform_machine == "ppc64le" or platform_machine == "x86_64" or platform_machine == "amd64" or platform_machine == "AMD64" or platform_machine == "win32" or platform_machine == "WIN32")
37
- h11==0.14.0 ; python_version >= "3.10" and python_version < "3.12"
38
- httpcore==1.0.7 ; python_version >= "3.10" and python_version < "3.12"
39
- httpx==0.28.1 ; python_version >= "3.10" and python_version < "3.12"
40
- huggingface-hub==0.29.1 ; python_version >= "3.10" and python_version < "3.12"
41
- humanfriendly==10.0 ; python_version >= "3.10" and python_version < "3.12"
42
- hyperpyyaml==1.2.2 ; python_version >= "3.10" and python_version < "3.12"
43
- idna==3.10 ; python_version >= "3.10" and python_version < "3.12"
44
- importlib-resources==6.5.2 ; python_version >= "3.10" and python_version < "3.12"
45
- jinja2==3.1.5 ; python_version >= "3.10" and python_version < "3.12"
46
- joblib==1.4.2 ; python_version >= "3.10" and python_version < "3.12"
47
- julius==0.2.7 ; python_version >= "3.10" and python_version < "3.12"
48
- kiwisolver==1.4.8 ; python_version >= "3.10" and python_version < "3.12"
49
- lightning-utilities==0.12.0 ; python_version >= "3.10" and python_version < "3.12"
50
- lightning==2.5.0.post0 ; python_version >= "3.10" and python_version < "3.12"
51
- lxml==5.3.1 ; python_version >= "3.10" and python_version < "3.12"
52
- mako==1.3.9 ; python_version >= "3.10" and python_version < "3.12"
53
- markdown-it-py==3.0.0 ; python_version >= "3.10" and python_version < "3.12"
54
- markupsafe==2.1.5 ; python_version >= "3.10" and python_version < "3.12"
55
- matplotlib==3.10.1 ; python_version >= "3.10" and python_version < "3.12"
56
- mdurl==0.1.2 ; python_version >= "3.10" and python_version < "3.12"
57
- mpmath==1.3.0 ; python_version >= "3.10" and python_version < "3.12"
58
- multidict==6.1.0 ; python_version >= "3.10" and python_version < "3.12"
59
- networkx==3.4.2 ; python_version >= "3.10" and python_version < "3.12"
60
- nltk==3.9.1 ; python_version >= "3.10" and python_version < "3.12"
61
- numpy==1.26.4 ; python_version >= "3.10" and python_version < "3.12"
62
- nvidia-cublas-cu12==12.4.5.8 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
63
- nvidia-cuda-cupti-cu12==12.4.127 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
64
- nvidia-cuda-nvrtc-cu12==12.4.127 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
65
- nvidia-cuda-runtime-cu12==12.4.127 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
66
- nvidia-cudnn-cu12==9.1.0.70 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
67
- nvidia-cufft-cu12==11.2.1.3 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
68
- nvidia-curand-cu12==10.3.5.147 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
69
- nvidia-cusolver-cu12==11.6.1.9 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
70
- nvidia-cusparse-cu12==12.3.1.170 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
71
- nvidia-cusparselt-cu12==0.6.2 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
72
- nvidia-nccl-cu12==2.21.5 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
73
- nvidia-nvjitlink-cu12==12.4.127 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
74
- nvidia-nvtx-cu12==12.4.127 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
75
- omegaconf==2.3.0 ; python_version >= "3.10" and python_version < "3.12"
76
- onnxruntime==1.20.1 ; python_version >= "3.10" and python_version < "3.12"
77
- optuna==4.2.1 ; python_version >= "3.10" and python_version < "3.12"
78
- orjson==3.10.15 ; python_version >= "3.10" and python_version < "3.12"
79
- packaging==24.2 ; python_version >= "3.10" and python_version < "3.12"
80
- pandas==2.2.3 ; python_version >= "3.10" and python_version < "3.12"
81
- pillow==10.4.0 ; python_version >= "3.10" and python_version < "3.12"
82
- primepy==1.3 ; python_version >= "3.10" and python_version < "3.12"
83
- propcache==0.3.0 ; python_version >= "3.10" and python_version < "3.12"
84
- protobuf==5.29.3 ; python_version >= "3.10" and python_version < "3.12"
85
- pyannote-audio==3.3.2 ; python_version >= "3.10" and python_version < "3.12"
86
- pyannote-core==5.0.0 ; python_version >= "3.10" and python_version < "3.12"
87
- pyannote-database==5.1.3 ; python_version >= "3.10" and python_version < "3.12"
88
- pyannote-metrics==3.2.1 ; python_version >= "3.10" and python_version < "3.12"
89
- pyannote-pipeline==3.0.1 ; python_version >= "3.10" and python_version < "3.12"
90
- pycparser==2.22 ; python_version >= "3.10" and python_version < "3.12"
91
- pydantic-core==2.27.2 ; python_version >= "3.10" and python_version < "3.12"
92
- pydantic==2.10.6 ; python_version >= "3.10" and python_version < "3.12"
93
- pydub==0.25.1 ; python_version >= "3.10" and python_version < "3.12"
94
- pygments==2.19.1 ; python_version >= "3.10" and python_version < "3.12"
95
- pyparsing==3.2.1 ; python_version >= "3.10" and python_version < "3.12"
96
- pyreadline3==3.5.4 ; python_version >= "3.10" and python_version < "3.12" and sys_platform == "win32"
97
- python-dateutil==2.9.0.post0 ; python_version >= "3.10" and python_version < "3.12"
98
- python-docx==1.1.2 ; python_version >= "3.10" and python_version < "3.12"
99
- python-dotenv==1.0.1 ; python_version >= "3.10" and python_version < "3.12"
100
- python-multipart==0.0.20 ; python_version >= "3.10" and python_version < "3.12"
101
- pytorch-lightning==2.5.0.post0 ; python_version >= "3.10" and python_version < "3.12"
102
- pytorch-metric-learning==2.8.1 ; python_version >= "3.10" and python_version < "3.12"
103
- pytz==2025.1 ; python_version >= "3.10" and python_version < "3.12"
104
- pyyaml==6.0.2 ; python_version >= "3.10" and python_version < "3.12"
105
- regex==2024.11.6 ; python_version >= "3.10" and python_version < "3.12"
106
- requests==2.32.3 ; python_version >= "3.10" and python_version < "3.12"
107
- rich==13.9.4 ; python_version >= "3.10" and python_version < "3.12"
108
- ruamel-yaml-clib==0.2.12 ; python_version >= "3.10" and python_version < "3.12" and platform_python_implementation == "CPython"
109
- ruamel-yaml==0.18.10 ; python_version >= "3.10" and python_version < "3.12"
110
- ruff==0.9.9 ; python_version >= "3.10" and python_version < "3.12" and sys_platform != "emscripten"
111
- safetensors==0.5.3 ; python_version >= "3.10" and python_version < "3.12"
112
- scikit-learn==1.6.1 ; python_version >= "3.10" and python_version < "3.12"
113
- scipy==1.15.2 ; python_version >= "3.10" and python_version < "3.12"
114
- semantic-version==2.10.0 ; python_version >= "3.10" and python_version < "3.12"
115
- semver==3.0.4 ; python_version >= "3.10" and python_version < "3.12"
116
- sentencepiece==0.2.0 ; python_version >= "3.10" and python_version < "3.12"
117
- setuptools==75.8.2 ; python_version >= "3.10" and python_version < "3.12"
118
- shellingham==1.5.4 ; python_version >= "3.10" and python_version < "3.12"
119
- six==1.17.0 ; python_version >= "3.10" and python_version < "3.12"
120
- sniffio==1.3.1 ; python_version >= "3.10" and python_version < "3.12"
121
- sortedcontainers==2.4.0 ; python_version >= "3.10" and python_version < "3.12"
122
- soundfile==0.13.1 ; python_version >= "3.10" and python_version < "3.12"
123
- speechbrain==1.0.2 ; python_version >= "3.10" and python_version < "3.12"
124
- sqlalchemy==2.0.38 ; python_version >= "3.10" and python_version < "3.12"
125
- starlette==0.46.0 ; python_version >= "3.10" and python_version < "3.12"
126
- sympy==1.13.1 ; python_version >= "3.10" and python_version < "3.12"
127
- tabulate==0.9.0 ; python_version >= "3.10" and python_version < "3.12"
128
- tensorboardx==2.6.2.2 ; python_version >= "3.10" and python_version < "3.12"
129
- threadpoolctl==3.5.0 ; python_version >= "3.10" and python_version < "3.12"
130
- tokenizers==0.21.0 ; python_version >= "3.10" and python_version < "3.12"
131
- tomlkit==0.12.0 ; python_version >= "3.10" and python_version < "3.12"
132
- torch-audiomentations==0.12.0 ; python_version >= "3.10" and python_version < "3.12"
133
- torch-pitch-shift==1.2.5 ; python_version >= "3.10" and python_version < "3.12"
134
- torch==2.6.0 ; python_version >= "3.10" and python_version < "3.12"
135
- torchaudio==2.6.0 ; python_version >= "3.10" and python_version < "3.12"
136
- torchmetrics==1.6.1 ; python_version >= "3.10" and python_version < "3.12"
137
- tqdm==4.67.1 ; python_version >= "3.10" and python_version < "3.12"
138
- transformers==4.49.0 ; python_version >= "3.10" and python_version < "3.12"
139
- triton==3.2.0 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
140
- typer==0.15.2 ; python_version >= "3.10" and python_version < "3.12"
141
- typing-extensions==4.12.2 ; python_version >= "3.10" and python_version < "3.12"
142
- tzdata==2025.1 ; python_version >= "3.10" and python_version < "3.12"
143
- urllib3==2.3.0 ; python_version >= "3.10" and python_version < "3.12"
144
- uvicorn==0.34.0 ; python_version >= "3.10" and python_version < "3.12" and sys_platform != "emscripten"
145
- websockets==12.0 ; python_version >= "3.10" and python_version < "3.12"
146
- whisperx==3.3.1 ; python_version >= "3.10" and python_version < "3.12"
147
- yarl==1.18.3 ; python_version >= "3.10" and python_version < "3.12"
 
1
+ pydub
2
+ pandas
3
+ numpy
4
+ torch
5
+ torchaudio
6
+ pyannote.audio
7
+ transformers>=4.19.0
8
+ ffmpeg-python==0.2.0
9
+ tqdm
10
+ transformers>=4.19.0
11
+ yt-dlp
12
+ tempfile
13
+ more_itertools
14
+ faster-whisper
15
+ git+https://github.com/m-bain/whisperx.git