Spaces:

Ngoufack
/

verbalens

Running

App Files Files Community

Ngoufack commited on Mar 4

Commit

628e92e

1 Parent(s): 09e94bd

update v2

Browse files

Files changed (2) hide show

app.py +184 -138
requirements.txt +147 -13

app.py CHANGED Viewed

@@ -1,143 +1,189 @@
-import spaces
-import torch
-import torchaudio
-import gradio as gr
-import yt_dlp as youtube_dl
-from faster_whisper import WhisperModel
-from transformers.pipelines.audio_utils import ffmpeg_read
-import tempfile
 import os
-from pyannote.audio.pipelines.speaker_diarization import SpeakerDiarization
-from pyannote.audio import Model
-from pyannote.core import Segment
-from transformers.pipelines.audio_utils import ffmpeg_read
-from pyannote.audio import Pipeline
-MODEL_NAME = "medium"
-BATCH_SIZE = 8
-FILE_LIMIT_MB = 1000
-YT_LENGTH_LIMIT_S = 3600  # limit to 1 hour YouTube files
-device = "cuda" if torch.cuda.is_available() else "cpu"
-model = WhisperModel(MODEL_NAME, device=device, compute_type="float16" if torch.cuda.is_available() else "int8")
-#model_pyannote = Model.from_pretrained("pyannote/speaker-diarization")
-pipeline = Pipeline.from_pretrained("pyannote/speaker-diarization",os.getenv('HF_TOKEN'))
-#pipeline = SpeakerDiarization(model_pyannote)
-@spaces.GPU
-def transcribe(inputs, task):
-    if inputs is None:
-        raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
-    waveform, sample_rate = torchaudio.load(inputs)
-    if sample_rate != 16000:
-        transform = torchaudio.transforms.Resample(orig_freq=sample_rate, new_freq=16000)
-        waveform = transform(waveform)
-    segments, _ = model.transcribe(inputs, task=task)
-    transcription = "\n".join([segment.text for segment in segments])
-    print(transcription)
-    # Diarisation avec le pipeline de pyannote
-    diarization = pipeline(inputs)
-    speaker_segments = []
-    for segment, _, speaker in diarization.itertracks(yield_label=True):
-        speaker_segments.append((segment.start, segment.end, speaker))
-    # Associer les segments de transcription aux locuteurs
-    speaker_texts = []
-    for start, end, speaker in speaker_segments:
-        spoken_text = " ".join([seg.text for seg in segments if seg.start >= start and seg.end <= end])
-        if spoken_text:
-            speaker_texts.append(f"{speaker}: {spoken_text}")
-    return "\n".join(speaker_texts)
-def _return_yt_html_embed(yt_url):
-    video_id = yt_url.split("?v=")[-1]
-    HTML_str = (
-        f'<center> <iframe width="500" height="320" src="https://www.youtube.com/embed/{video_id}"> </iframe>'
-        " </center>"
-    )
-    return HTML_str
-def download_yt_audio(yt_url, filename):
-    info_loader = youtube_dl.YoutubeDL()
     try:
-        info = info_loader.extract_info(yt_url, download=False)
-    except youtube_dl.utils.DownloadError as err:
-        raise gr.Error(str(err))
-    file_length_s = info["duration"]
-    if file_length_s > YT_LENGTH_LIMIT_S:
-        raise gr.Error(f"Maximum YouTube length is {YT_LENGTH_LIMIT_S} seconds, got {file_length_s} seconds.")
-    ydl_opts = {"outtmpl": filename, "format": "bestaudio/best"}
-    with youtube_dl.YoutubeDL(ydl_opts) as ydl:
-        try:
-            ydl.download([yt_url])
-        except youtube_dl.utils.ExtractorError as err:
-            raise gr.Error(str(err))
-@spaces.GPU
-def yt_transcribe(yt_url, task):
-    html_embed_str = _return_yt_html_embed(yt_url)
-    with tempfile.TemporaryDirectory() as tmpdirname:
-        filepath = os.path.join(tmpdirname, "video.mp4")
-        download_yt_audio(yt_url, filepath)
-        with open(filepath, "rb") as f:
-            inputs = f.read()
-    inputs = ffmpeg_read(inputs, 16000)  # Convertir en 16kHz
-    segments, _ = model.transcribe(inputs, task=task)
-    text = " ".join([segment.text for segment in segments])
-    return html_embed_str, text
-demo = gr.Blocks(theme=gr.themes.Ocean())
-mf_transcribe = gr.Interface(
-    fn=transcribe,
-    inputs=[
-        gr.Audio(sources="microphone", type="filepath"),
-        gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
-    ],
-    outputs="text",
-    title="VerbaLend Demo 1 : Prototype",
-    description=(
-        "Transcribe long-form microphone or audio inputs with the click of a button! Demo uses Faster Whisper"
-    ),
-    allow_flagging="never",
-)
-file_transcribe = gr.Interface(
-    fn=transcribe,
-    inputs=[
-        gr.Audio(sources="upload", type="filepath", label="Audio file"),
-        gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
-    ],
-    outputs="text",
-    title="VerbaLend Demo 1 : Prototype",
-    description="Transcribe uploaded audio files with Faster Whisper.",
-    allow_flagging="never",
-)
-yt_transcribe = gr.Interface(
-    fn=yt_transcribe,
-    inputs=[
-        gr.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"),
-        gr.Radio(["transcribe", "translate"], label="Task", value="transcribe")
-    ],
-    outputs=["html", "text"],
-    title="VerbaLend Demo 1 : Prototype",
-    description="Transcribe YouTube videos using Faster Whisper.",
-    allow_flagging="never",
-)
-with demo:
-    gr.TabbedInterface([mf_transcribe, file_transcribe, yt_transcribe], ["Microphone", "Audio file", "YouTube"])
-demo.queue().launch(ssr_mode=False)

 import os
+import tempfile
+import time
+import gradio as gr
+import whisperx
+import torch
+from docx import Document
+from docx.shared import RGBColor
+import numpy as np
+import soundfile as sf
+from datetime import date
+from dotenv import load_dotenv
+# Load environment variables from .env file
+load_dotenv()
+# Get Hugging Face token from environment variables
+HUGGINGFACE_TOKEN = os.getenv("HF_TOKEN")
+# Set device for computation
+DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
+COMPUTE_TYPE = "float16" if torch.cuda.is_available() else "int8"
+# Speaker colors for docx export
+SPEAKER_COLORS = {
+    "SPEAKER_00": RGBColor(255, 0, 0),   # Red
+    "SPEAKER_01": RGBColor(0, 0, 255),   # Blue
+    "SPEAKER_02": RGBColor(0, 128, 0),   # Green
+    "SPEAKER_03": RGBColor(128, 0, 128), # Purple
+    "SPEAKER_04": RGBColor(255, 165, 0), # Orange
+    "SPEAKER_05": RGBColor(0, 128, 128), # Teal
+    "SPEAKER_06": RGBColor(139, 69, 19), # Brown
+    "SPEAKER_07": RGBColor(105, 105, 105), # Gray
+    "SPEAKER_08": RGBColor(255, 20, 147), # Pink
+    "SPEAKER_09": RGBColor(0, 191, 255),  # Sky Blue
+}
+def format_time(seconds):
+    """Convert seconds to HH:MM:SS format."""
+    hours = int(seconds // 3600)
+    minutes = int((seconds % 3600) // 60)
+    seconds = int(seconds % 60)
+    return f"{hours:02d}:{minutes:02d}:{seconds:02d}"
+def transcribe_audio(audio_path, model_name="large-v2"):
+    """Transcribe audio using WhisperX and identify speakers."""
     try:
+        # 1. Load and transcribe audio with whisperx
+        model = whisperx.load_model(model_name, DEVICE, compute_type=COMPUTE_TYPE)
+        audio = whisperx.load_audio(audio_path)
+        result = model.transcribe(audio, batch_size=16)
+        # 2. Align whisper output
+        model_a, metadata = whisperx.load_align_model(language_code=result["language"], device=DEVICE)
+        result = whisperx.align(result["segments"], model_a, metadata, audio, DEVICE, return_char_alignments=False)
+        # 3. Assign speaker labels
+        diarize_model = whisperx.DiarizationPipeline(use_auth_token=HUGGINGFACE_TOKEN, device=DEVICE)
+        diarize_segments = diarize_model(audio, min_speakers=1, max_speakers=10)
+        result = whisperx.assign_word_speakers(diarize_segments, result)
+        return result, None
+    except Exception as e:
+        return None, str(e)
+def export_to_docx(result, output_path=None):
+    """Export transcription to DOCX with timecodes and color-coded speakers."""
+    if output_path is None:
+        output_path = os.path.join(tempfile.gettempdir(), f"transcript_{int(time.time())}.docx")
+    formatted_date = date.today().strftime("%d/%m/%Y")
+    doc = Document()
+    doc.add_heading('Transcription ' + formatted_date, 0)
+    for segment in result["segments"]:
+        start_time = format_time(segment["start"])
+        end_time = format_time(segment["end"])
+        # Create a paragraph for this segment
+        p = doc.add_paragraph()
+        p.add_run(f"[{start_time} - {end_time}] ").bold = True
+        speaker = segment.get("speaker", "UNKNOWN")
+        if speaker in SPEAKER_COLORS:
+            speaker_run = p.add_run(f"{speaker}: ")
+            speaker_run.font.color.rgb = SPEAKER_COLORS[speaker]
+            speaker_run.bold = True
+        else:
+            p.add_run(f"{speaker}: ").bold = True
+        p.add_run(segment["text"])
+    doc.save(output_path)
+    return output_path
+def save_audio(audio_data, sample_rate):
+    """Save the recorded audio to a temporary file."""
+    filename = f"recorded_audio_{int(time.time())}.wav"
+    temp_file = os.path.join(tempfile.gettempdir(), filename)
+    sf.write(temp_file, audio_data, sample_rate)
+    return temp_file, filename
+def process_audio(audio_input=None, model_name="large-v2"):
+    """Process the audio file or recording."""
+    try:
+        if audio_input is None:
+            return None, None, "No audio provided", None
+        # Determine if it's a file path (upload) or tuple (recording)
+        if isinstance(audio_input, tuple) and len(audio_input) >= 2:
+            # Handle recorded audio
+            filepath, filename = save_audio(audio_input[0], audio_input[1])
+            is_temp_file = True
+        else:
+            # Handle uploaded file
+            filepath = audio_input
+            filename = os.path.basename(filepath) if filepath else None
+            is_temp_file = False
+        # Transcribe audio
+        result, error = transcribe_audio(filepath, model_name)
+        if error:
+            return None, None, f"Transcription error: {error}", None
+        # Export to DOCX
+        docx_path = export_to_docx(result)
+        # Prepare display table
+        table_data = []
+        for segment in result["segments"]:
+            start_time = format_time(segment["start"])
+            end_time = format_time(segment["end"])
+            speaker = segment.get("speaker", "UNKNOWN")
+            text = segment["text"]
+            table_data.append([f"{start_time} - {end_time}", speaker, text])
+        # Prepare audio for download
+        if is_temp_file:
+            download_path = (filepath, filename)
+        else:
+            # For uploaded files, no need to provide download as user already has the file
+            download_path = None
+        return table_data, docx_path, "Transcription completed successfully", download_path
+    except Exception as e:
+        return None, None, f"Error: {str(e)}", None
+def create_interface():
+    """Create the Gradio interface."""
+    with gr.Blocks(title="WhisperX Transcription") as interface:
+        gr.Markdown("# 🎙️ Audio Transcription with Speaker Identification")
+        gr.Markdown("Upload an audio file or record directly to transcribe and identify speakers.")
+        with gr.Row():
+            with gr.Column():
+                gr.Markdown("### Input")
+                with gr.Tab("Audio Input"):
+                    audio_file = gr.Audio(sources=["upload", "microphone"], type="filepath", label="Upload or Record Audio (MP3 or WAV)")
+                model_dropdown = gr.Dropdown(
+                    choices=["large-v2", "large-v3", "medium", "small", "base", "tiny"],
+                    value="large-v2",
+                    label="Whisper Model",
+                    info="Larger models are more accurate but require more computational resources"
+                )
+                process_btn = gr.Button("Transcribe", variant="primary")
+            with gr.Column():
+                gr.Markdown("### Results")
+                status = gr.Textbox(label="Status", interactive=False)
+                transcription = gr.DataFrame(
+                    headers=["Time", "Speaker", "Text"],
+                    label="Transcription Results"
+                )
+                with gr.Row():
+                    docx_output = gr.File(label="DOCX Export")
+                    audio_download = gr.File(label="Download Recorded Audio")
+        process_btn.click(
+            fn=process_audio,
+            inputs=[audio_file, model_dropdown],
+            outputs=[transcription, docx_output, status, audio_download]
+        )
+    return interface
+interface = create_interface()
+interface.queue.launch(ssr_mode=False)

requirements.txt CHANGED Viewed

@@ -1,13 +1,147 @@
-transformers
-yt-dlp
-torch
-torchvision
-torchaudio
-nemo_toolkit
-faster-whisper
-ctranslate2
-intervaltree
-srt
-torch
-dotenv
-pyannote.audio

+aiofiles==23.2.1 ; python_version >= "3.10" and python_version < "3.12"
+aiohappyeyeballs==2.4.6 ; python_version >= "3.10" and python_version < "3.12"
+aiohttp==3.11.13 ; python_version >= "3.10" and python_version < "3.12"
+aiosignal==1.3.2 ; python_version >= "3.10" and python_version < "3.12"
+alembic==1.14.1 ; python_version >= "3.10" and python_version < "3.12"
+annotated-types==0.7.0 ; python_version >= "3.10" and python_version < "3.12"
+antlr4-python3-runtime==4.9.3 ; python_version >= "3.10" and python_version < "3.12"
+anyio==4.8.0 ; python_version >= "3.10" and python_version < "3.12"
+asteroid-filterbanks==0.4.0 ; python_version >= "3.10" and python_version < "3.12"
+async-timeout==5.0.1 ; python_version == "3.10"
+attrs==25.1.0 ; python_version >= "3.10" and python_version < "3.12"
+av==14.2.0 ; python_version >= "3.10" and python_version < "3.12"
+certifi==2025.1.31 ; python_version >= "3.10" and python_version < "3.12"
+cffi==1.17.1 ; python_version >= "3.10" and python_version < "3.12"
+charset-normalizer==3.4.1 ; python_version >= "3.10" and python_version < "3.12"
+click==8.1.8 ; python_version >= "3.10" and python_version < "3.12"
+colorama==0.4.6 ; python_version >= "3.10" and python_version < "3.12" and (platform_system == "Windows" or sys_platform == "win32")
+coloredlogs==15.0.1 ; python_version >= "3.10" and python_version < "3.12"
+colorlog==6.9.0 ; python_version >= "3.10" and python_version < "3.12"
+contourpy==1.3.1 ; python_version >= "3.10" and python_version < "3.12"
+ctranslate2==4.4.0 ; python_version >= "3.10" and python_version < "3.12"
+cycler==0.12.1 ; python_version >= "3.10" and python_version < "3.12"
+docopt==0.6.2 ; python_version >= "3.10" and python_version < "3.12"
+einops==0.8.1 ; python_version >= "3.10" and python_version < "3.12"
+exceptiongroup==1.2.2 ; python_version == "3.10"
+fastapi==0.115.11 ; python_version >= "3.10" and python_version < "3.12"
+faster-whisper==1.1.0 ; python_version >= "3.10" and python_version < "3.12"
+ffmpy==0.5.0 ; python_version >= "3.10" and python_version < "3.12"
+filelock==3.17.0 ; python_version >= "3.10" and python_version < "3.12"
+flatbuffers==25.2.10 ; python_version >= "3.10" and python_version < "3.12"
+fonttools==4.56.0 ; python_version >= "3.10" and python_version < "3.12"
+frozenlist==1.5.0 ; python_version >= "3.10" and python_version < "3.12"
+fsspec==2025.2.0 ; python_version >= "3.10" and python_version < "3.12"
+gradio-client==1.3.0 ; python_version >= "3.10" and python_version < "3.12"
+gradio==4.44.1 ; python_version >= "3.10" and python_version < "3.12"
+greenlet==3.1.1 ; python_version >= "3.10" and python_version < "3.12" and (platform_machine == "aarch64" or platform_machine == "ppc64le" or platform_machine == "x86_64" or platform_machine == "amd64" or platform_machine == "AMD64" or platform_machine == "win32" or platform_machine == "WIN32")
+h11==0.14.0 ; python_version >= "3.10" and python_version < "3.12"
+httpcore==1.0.7 ; python_version >= "3.10" and python_version < "3.12"
+httpx==0.28.1 ; python_version >= "3.10" and python_version < "3.12"
+huggingface-hub==0.29.1 ; python_version >= "3.10" and python_version < "3.12"
+humanfriendly==10.0 ; python_version >= "3.10" and python_version < "3.12"
+hyperpyyaml==1.2.2 ; python_version >= "3.10" and python_version < "3.12"
+idna==3.10 ; python_version >= "3.10" and python_version < "3.12"
+importlib-resources==6.5.2 ; python_version >= "3.10" and python_version < "3.12"
+jinja2==3.1.5 ; python_version >= "3.10" and python_version < "3.12"
+joblib==1.4.2 ; python_version >= "3.10" and python_version < "3.12"
+julius==0.2.7 ; python_version >= "3.10" and python_version < "3.12"
+kiwisolver==1.4.8 ; python_version >= "3.10" and python_version < "3.12"
+lightning-utilities==0.12.0 ; python_version >= "3.10" and python_version < "3.12"
+lightning==2.5.0.post0 ; python_version >= "3.10" and python_version < "3.12"
+lxml==5.3.1 ; python_version >= "3.10" and python_version < "3.12"
+mako==1.3.9 ; python_version >= "3.10" and python_version < "3.12"
+markdown-it-py==3.0.0 ; python_version >= "3.10" and python_version < "3.12"
+markupsafe==2.1.5 ; python_version >= "3.10" and python_version < "3.12"
+matplotlib==3.10.1 ; python_version >= "3.10" and python_version < "3.12"
+mdurl==0.1.2 ; python_version >= "3.10" and python_version < "3.12"
+mpmath==1.3.0 ; python_version >= "3.10" and python_version < "3.12"
+multidict==6.1.0 ; python_version >= "3.10" and python_version < "3.12"
+networkx==3.4.2 ; python_version >= "3.10" and python_version < "3.12"
+nltk==3.9.1 ; python_version >= "3.10" and python_version < "3.12"
+numpy==1.26.4 ; python_version >= "3.10" and python_version < "3.12"
+nvidia-cublas-cu12==12.4.5.8 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
+nvidia-cuda-cupti-cu12==12.4.127 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
+nvidia-cuda-nvrtc-cu12==12.4.127 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
+nvidia-cuda-runtime-cu12==12.4.127 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
+nvidia-cudnn-cu12==9.1.0.70 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
+nvidia-cufft-cu12==11.2.1.3 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
+nvidia-curand-cu12==10.3.5.147 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
+nvidia-cusolver-cu12==11.6.1.9 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
+nvidia-cusparse-cu12==12.3.1.170 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
+nvidia-cusparselt-cu12==0.6.2 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
+nvidia-nccl-cu12==2.21.5 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
+nvidia-nvjitlink-cu12==12.4.127 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
+nvidia-nvtx-cu12==12.4.127 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
+omegaconf==2.3.0 ; python_version >= "3.10" and python_version < "3.12"
+onnxruntime==1.20.1 ; python_version >= "3.10" and python_version < "3.12"
+optuna==4.2.1 ; python_version >= "3.10" and python_version < "3.12"
+orjson==3.10.15 ; python_version >= "3.10" and python_version < "3.12"
+packaging==24.2 ; python_version >= "3.10" and python_version < "3.12"
+pandas==2.2.3 ; python_version >= "3.10" and python_version < "3.12"
+pillow==10.4.0 ; python_version >= "3.10" and python_version < "3.12"
+primepy==1.3 ; python_version >= "3.10" and python_version < "3.12"
+propcache==0.3.0 ; python_version >= "3.10" and python_version < "3.12"
+protobuf==5.29.3 ; python_version >= "3.10" and python_version < "3.12"
+pyannote-audio==3.3.2 ; python_version >= "3.10" and python_version < "3.12"
+pyannote-core==5.0.0 ; python_version >= "3.10" and python_version < "3.12"
+pyannote-database==5.1.3 ; python_version >= "3.10" and python_version < "3.12"
+pyannote-metrics==3.2.1 ; python_version >= "3.10" and python_version < "3.12"
+pyannote-pipeline==3.0.1 ; python_version >= "3.10" and python_version < "3.12"
+pycparser==2.22 ; python_version >= "3.10" and python_version < "3.12"
+pydantic-core==2.27.2 ; python_version >= "3.10" and python_version < "3.12"
+pydantic==2.10.6 ; python_version >= "3.10" and python_version < "3.12"
+pydub==0.25.1 ; python_version >= "3.10" and python_version < "3.12"
+pygments==2.19.1 ; python_version >= "3.10" and python_version < "3.12"
+pyparsing==3.2.1 ; python_version >= "3.10" and python_version < "3.12"
+pyreadline3==3.5.4 ; python_version >= "3.10" and python_version < "3.12" and sys_platform == "win32"
+python-dateutil==2.9.0.post0 ; python_version >= "3.10" and python_version < "3.12"
+python-docx==1.1.2 ; python_version >= "3.10" and python_version < "3.12"
+python-dotenv==1.0.1 ; python_version >= "3.10" and python_version < "3.12"
+python-multipart==0.0.20 ; python_version >= "3.10" and python_version < "3.12"
+pytorch-lightning==2.5.0.post0 ; python_version >= "3.10" and python_version < "3.12"
+pytorch-metric-learning==2.8.1 ; python_version >= "3.10" and python_version < "3.12"
+pytz==2025.1 ; python_version >= "3.10" and python_version < "3.12"
+pyyaml==6.0.2 ; python_version >= "3.10" and python_version < "3.12"
+regex==2024.11.6 ; python_version >= "3.10" and python_version < "3.12"
+requests==2.32.3 ; python_version >= "3.10" and python_version < "3.12"
+rich==13.9.4 ; python_version >= "3.10" and python_version < "3.12"
+ruamel-yaml-clib==0.2.12 ; python_version >= "3.10" and python_version < "3.12" and platform_python_implementation == "CPython"
+ruamel-yaml==0.18.10 ; python_version >= "3.10" and python_version < "3.12"
+ruff==0.9.9 ; python_version >= "3.10" and python_version < "3.12" and sys_platform != "emscripten"
+safetensors==0.5.3 ; python_version >= "3.10" and python_version < "3.12"
+scikit-learn==1.6.1 ; python_version >= "3.10" and python_version < "3.12"
+scipy==1.15.2 ; python_version >= "3.10" and python_version < "3.12"
+semantic-version==2.10.0 ; python_version >= "3.10" and python_version < "3.12"
+semver==3.0.4 ; python_version >= "3.10" and python_version < "3.12"
+sentencepiece==0.2.0 ; python_version >= "3.10" and python_version < "3.12"
+setuptools==75.8.2 ; python_version >= "3.10" and python_version < "3.12"
+shellingham==1.5.4 ; python_version >= "3.10" and python_version < "3.12"
+six==1.17.0 ; python_version >= "3.10" and python_version < "3.12"
+sniffio==1.3.1 ; python_version >= "3.10" and python_version < "3.12"
+sortedcontainers==2.4.0 ; python_version >= "3.10" and python_version < "3.12"
+soundfile==0.13.1 ; python_version >= "3.10" and python_version < "3.12"
+speechbrain==1.0.2 ; python_version >= "3.10" and python_version < "3.12"
+sqlalchemy==2.0.38 ; python_version >= "3.10" and python_version < "3.12"
+starlette==0.46.0 ; python_version >= "3.10" and python_version < "3.12"
+sympy==1.13.1 ; python_version >= "3.10" and python_version < "3.12"
+tabulate==0.9.0 ; python_version >= "3.10" and python_version < "3.12"
+tensorboardx==2.6.2.2 ; python_version >= "3.10" and python_version < "3.12"
+threadpoolctl==3.5.0 ; python_version >= "3.10" and python_version < "3.12"
+tokenizers==0.21.0 ; python_version >= "3.10" and python_version < "3.12"
+tomlkit==0.12.0 ; python_version >= "3.10" and python_version < "3.12"
+torch-audiomentations==0.12.0 ; python_version >= "3.10" and python_version < "3.12"
+torch-pitch-shift==1.2.5 ; python_version >= "3.10" and python_version < "3.12"
+torch==2.6.0 ; python_version >= "3.10" and python_version < "3.12"
+torchaudio==2.6.0 ; python_version >= "3.10" and python_version < "3.12"
+torchmetrics==1.6.1 ; python_version >= "3.10" and python_version < "3.12"
+tqdm==4.67.1 ; python_version >= "3.10" and python_version < "3.12"
+transformers==4.49.0 ; python_version >= "3.10" and python_version < "3.12"
+triton==3.2.0 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
+typer==0.15.2 ; python_version >= "3.10" and python_version < "3.12"
+typing-extensions==4.12.2 ; python_version >= "3.10" and python_version < "3.12"
+tzdata==2025.1 ; python_version >= "3.10" and python_version < "3.12"
+urllib3==2.3.0 ; python_version >= "3.10" and python_version < "3.12"
+uvicorn==0.34.0 ; python_version >= "3.10" and python_version < "3.12" and sys_platform != "emscripten"
+websockets==12.0 ; python_version >= "3.10" and python_version < "3.12"
+whisperx==3.3.1 ; python_version >= "3.10" and python_version < "3.12"
+yarl==1.18.3 ; python_version >= "3.10" and python_version < "3.12"