Spaces:

Ngoufack
/

verbalens

Running

App Files Files Community

Ngoufack commited on Mar 4

Commit

3335977

1 Parent(s): ea3c466

retour au debut

Browse files

Files changed (2) hide show

app.py +82 -174
requirements.txt +15 -147

app.py CHANGED Viewed

@@ -1,189 +1,97 @@
-import os
-import tempfile
-import time
 import gradio as gr
 import whisperx
-import torch
-from docx import Document
-from docx.shared import RGBColor
-import numpy as np
-import soundfile as sf
-from datetime import date
-from dotenv import load_dotenv
-# Load environment variables from .env file
-load_dotenv()
-# Get Hugging Face token from environment variables
-HUGGINGFACE_TOKEN = os.getenv("HF_TOKEN")
-# Set device for computation
-DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-COMPUTE_TYPE = "float16" if torch.cuda.is_available() else "int8"
-# Speaker colors for docx export
-SPEAKER_COLORS = {
-    "SPEAKER_00": RGBColor(255, 0, 0),   # Red
-    "SPEAKER_01": RGBColor(0, 0, 255),   # Blue
-    "SPEAKER_02": RGBColor(0, 128, 0),   # Green
-    "SPEAKER_03": RGBColor(128, 0, 128), # Purple
-    "SPEAKER_04": RGBColor(255, 165, 0), # Orange
-    "SPEAKER_05": RGBColor(0, 128, 128), # Teal
-    "SPEAKER_06": RGBColor(139, 69, 19), # Brown
-    "SPEAKER_07": RGBColor(105, 105, 105), # Gray
-    "SPEAKER_08": RGBColor(255, 20, 147), # Pink
-    "SPEAKER_09": RGBColor(0, 191, 255),  # Sky Blue
-}
-def format_time(seconds):
-    """Convert seconds to HH:MM:SS format."""
-    hours = int(seconds // 3600)
-    minutes = int((seconds % 3600) // 60)
-    seconds = int(seconds % 60)
-    return f"{hours:02d}:{minutes:02d}:{seconds:02d}"
-def transcribe_audio(audio_path, model_name="large-v2"):
-    """Transcribe audio using WhisperX and identify speakers."""
-    try:
-        # 1. Load and transcribe audio with whisperx
-        model = whisperx.load_model(model_name, DEVICE, compute_type=COMPUTE_TYPE)
-        audio = whisperx.load_audio(audio_path)
-        result = model.transcribe(audio, batch_size=16)
-        # 2. Align whisper output
-        model_a, metadata = whisperx.load_align_model(language_code=result["language"], device=DEVICE)
-        result = whisperx.align(result["segments"], model_a, metadata, audio, DEVICE, return_char_alignments=False)
-        # 3. Assign speaker labels
-        diarize_model = whisperx.DiarizationPipeline(use_auth_token=HUGGINGFACE_TOKEN, device=DEVICE)
-        diarize_segments = diarize_model(audio, min_speakers=1, max_speakers=10)
-        result = whisperx.assign_word_speakers(diarize_segments, result)
-        return result, None
-    except Exception as e:
-        return None, str(e)
-def export_to_docx(result, output_path=None):
-    """Export transcription to DOCX with timecodes and color-coded speakers."""
-    if output_path is None:
-        output_path = os.path.join(tempfile.gettempdir(), f"transcript_{int(time.time())}.docx")
-    formatted_date = date.today().strftime("%d/%m/%Y")
-    doc = Document()
-    doc.add_heading('Transcription ' + formatted_date, 0)
-    for segment in result["segments"]:
-        start_time = format_time(segment["start"])
-        end_time = format_time(segment["end"])
-        # Create a paragraph for this segment
-        p = doc.add_paragraph()
-        p.add_run(f"[{start_time} - {end_time}] ").bold = True
-        speaker = segment.get("speaker", "UNKNOWN")
-        if speaker in SPEAKER_COLORS:
-            speaker_run = p.add_run(f"{speaker}: ")
-            speaker_run.font.color.rgb = SPEAKER_COLORS[speaker]
-            speaker_run.bold = True
-        else:
-            p.add_run(f"{speaker}: ").bold = True
-        p.add_run(segment["text"])
-    doc.save(output_path)
-    return output_path
-def save_audio(audio_data, sample_rate):
-    """Save the recorded audio to a temporary file."""
-    filename = f"recorded_audio_{int(time.time())}.wav"
-    temp_file = os.path.join(tempfile.gettempdir(), filename)
-    sf.write(temp_file, audio_data, sample_rate)
-    return temp_file, filename
-def process_audio(audio_input=None, model_name="large-v2"):
-    """Process the audio file or recording."""
-    try:
-        if audio_input is None:
-            return None, None, "No audio provided", None
-        # Determine if it's a file path (upload) or tuple (recording)
-        if isinstance(audio_input, tuple) and len(audio_input) >= 2:
-            # Handle recorded audio
-            filepath, filename = save_audio(audio_input[0], audio_input[1])
-            is_temp_file = True
-        else:
-            # Handle uploaded file
-            filepath = audio_input
-            filename = os.path.basename(filepath) if filepath else None
-            is_temp_file = False
-        # Transcribe audio
-        result, error = transcribe_audio(filepath, model_name)
-        if error:
-            return None, None, f"Transcription error: {error}", None
-        # Export to DOCX
-        docx_path = export_to_docx(result)
-        # Prepare display table
-        table_data = []
-        for segment in result["segments"]:
-            start_time = format_time(segment["start"])
-            end_time = format_time(segment["end"])
-            speaker = segment.get("speaker", "UNKNOWN")
-            text = segment["text"]
-            table_data.append([f"{start_time} - {end_time}", speaker, text])
-        # Prepare audio for download
-        if is_temp_file:
-            download_path = (filepath, filename)
-        else:
-            # For uploaded files, no need to provide download as user already has the file
-            download_path = None
-        return table_data, docx_path, "Transcription completed successfully", download_path
-    except Exception as e:
-        return None, None, f"Error: {str(e)}", None
-def create_interface():
-    """Create the Gradio interface."""
-    with gr.Blocks(title="WhisperX Transcription") as interface:
-        gr.Markdown("# Verbalens Prototype : Audio Transcription with Speaker Identification")
-        gr.Markdown("Upload an audio file or record directly to transcribe and identify speakers.")
-        with gr.Row():
-            with gr.Column():
-                gr.Markdown("### Input")
-                with gr.Tab("Audio Input"):
-                    audio_file = gr.Audio(sources=["upload", "microphone"], type="filepath", label="Upload or Record Audio (MP3 or WAV)")
-                model_dropdown = gr.Dropdown(
-                    choices=["large-v2", "large-v3", "medium", "small", "base", "tiny"],
-                    value="large-v2",
-                    label="Whisper Model",
-                    info="Larger models are more accurate but require more computational resources"
-                )
-                process_btn = gr.Button("Transcribe", variant="primary")
-            with gr.Column():
-                gr.Markdown("### Results")
-                status = gr.Textbox(label="Status", interactive=False)
-                transcription = gr.DataFrame(
-                    headers=["Time", "Speaker", "Text"],
-                    label="Transcription Results"
-                )
-                with gr.Row():
-                    docx_output = gr.File(label="DOCX Export")
-                    audio_download = gr.File(label="Download Recorded Audio")
-        process_btn.click(
-            fn=process_audio,
-            inputs=[audio_file, model_dropdown],
-            outputs=[transcription, docx_output, status, audio_download]
-        )
-    return interface
-interface = create_interface()
-interface.queue().launch(ssr_mode=False)

+import spaces
+import torch
 import gradio as gr
+import yt_dlp as youtube_dl
 import whisperx
+import tempfile
+import os
+device = "cuda" if torch.cuda.is_available() else "cpu"
+BATCH_SIZE = 8
+FILE_LIMIT_MB = 1000
+YT_LENGTH_LIMIT_S = 3600  # limit to 1 hour YouTube files
+model = whisperx.load_model("large-v2", device)
+@spaces.GPU
+def transcribe(inputs, task):
+    if inputs is None:
+        raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
+    audio = whisperx.load_audio(inputs)
+    result = model.transcribe(audio, batch_size=BATCH_SIZE)
+    return result["text"]
+def _return_yt_html_embed(yt_url):
+    video_id = yt_url.split("?v=")[-1]
+    return f'<center><iframe width="500" height="320" src="https://www.youtube.com/embed/{video_id}"></iframe></center>'
+def download_yt_audio(yt_url, filename):
+    ydl_opts = {
+        "format": "bestaudio/best",
+        "outtmpl": filename,
+        "postprocessors": [{
+            "key": "FFmpegExtractAudio",
+            "preferredcodec": "wav",
+            "preferredquality": "192",
+        }],
+    }
+    with youtube_dl.YoutubeDL(ydl_opts) as ydl:
+        ydl.download([yt_url])
+@spaces.GPU
+def yt_transcribe(yt_url, task):
+    html_embed_str = _return_yt_html_embed(yt_url)
+    with tempfile.TemporaryDirectory() as tmpdirname:
+        filepath = os.path.join(tmpdirname, "audio.wav")
+        download_yt_audio(yt_url, filepath)
+        audio = whisperx.load_audio(filepath)
+        result = model.transcribe(audio, batch_size=BATCH_SIZE)
+    return html_embed_str, result["text"]
+demo = gr.Blocks(theme=gr.themes.Ocean())
+mf_transcribe = gr.Interface(
+    fn=transcribe,
+    inputs=[
+        gr.Audio(sources="microphone", type="filepath"),
+        gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
+    ],
+    outputs="text",
+    title="VerbaLend Demo with WhisperX",
+    description="Transcribe long-form microphone or audio inputs using WhisperX.",
+    allow_flagging="never",
+)
+file_transcribe = gr.Interface(
+    fn=transcribe,
+    inputs=[
+        gr.Audio(sources="upload", type="filepath", label="Audio file"),
+        gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
+    ],
+    outputs="text",
+    title="VerbaLend Demo with WhisperX",
+    description="Transcribe uploaded audio files using WhisperX.",
+    allow_flagging="never",
+)
+yt_transcribe = gr.Interface(
+    fn=yt_transcribe,
+    inputs=[
+        gr.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"),
+        gr.Radio(["transcribe", "translate"], label="Task", value="transcribe"),
+    ],
+    outputs=["html", "text"],
+    title="VerbaLend Demo with WhisperX",
+    description="Transcribe YouTube videos using WhisperX.",
+    allow_flagging="never",
+)
+with demo:
+    gr.TabbedInterface([mf_transcribe, file_transcribe, yt_transcribe], ["Microphone", "Audio file", "YouTube"])
+demo.queue().launch(ssr_mode=False)

requirements.txt CHANGED Viewed

@@ -1,147 +1,15 @@
-aiofiles==23.2.1 ; python_version >= "3.10" and python_version < "3.12"
-aiohappyeyeballs==2.4.6 ; python_version >= "3.10" and python_version < "3.12"
-aiohttp==3.11.13 ; python_version >= "3.10" and python_version < "3.12"
-aiosignal==1.3.2 ; python_version >= "3.10" and python_version < "3.12"
-alembic==1.14.1 ; python_version >= "3.10" and python_version < "3.12"
-annotated-types==0.7.0 ; python_version >= "3.10" and python_version < "3.12"
-antlr4-python3-runtime==4.9.3 ; python_version >= "3.10" and python_version < "3.12"
-anyio==4.8.0 ; python_version >= "3.10" and python_version < "3.12"
-asteroid-filterbanks==0.4.0 ; python_version >= "3.10" and python_version < "3.12"
-async-timeout==5.0.1 ; python_version == "3.10"
-attrs==25.1.0 ; python_version >= "3.10" and python_version < "3.12"
-av==14.2.0 ; python_version >= "3.10" and python_version < "3.12"
-certifi==2025.1.31 ; python_version >= "3.10" and python_version < "3.12"
-cffi==1.17.1 ; python_version >= "3.10" and python_version < "3.12"
-charset-normalizer==3.4.1 ; python_version >= "3.10" and python_version < "3.12"
-click==8.1.8 ; python_version >= "3.10" and python_version < "3.12"
-colorama==0.4.6 ; python_version >= "3.10" and python_version < "3.12" and (platform_system == "Windows" or sys_platform == "win32")
-coloredlogs==15.0.1 ; python_version >= "3.10" and python_version < "3.12"
-colorlog==6.9.0 ; python_version >= "3.10" and python_version < "3.12"
-contourpy==1.3.1 ; python_version >= "3.10" and python_version < "3.12"
-ctranslate2==4.4.0 ; python_version >= "3.10" and python_version < "3.12"
-cycler==0.12.1 ; python_version >= "3.10" and python_version < "3.12"
-docopt==0.6.2 ; python_version >= "3.10" and python_version < "3.12"
-einops==0.8.1 ; python_version >= "3.10" and python_version < "3.12"
-exceptiongroup==1.2.2 ; python_version == "3.10"
-fastapi==0.115.11 ; python_version >= "3.10" and python_version < "3.12"
-faster-whisper==1.1.0 ; python_version >= "3.10" and python_version < "3.12"
-ffmpy==0.5.0 ; python_version >= "3.10" and python_version < "3.12"
-filelock==3.17.0 ; python_version >= "3.10" and python_version < "3.12"
-flatbuffers==25.2.10 ; python_version >= "3.10" and python_version < "3.12"
-fonttools==4.56.0 ; python_version >= "3.10" and python_version < "3.12"
-frozenlist==1.5.0 ; python_version >= "3.10" and python_version < "3.12"
-fsspec==2025.2.0 ; python_version >= "3.10" and python_version < "3.12"
-gradio-client==1.3.0 ; python_version >= "3.10" and python_version < "3.12"
-gradio==4.44.1 ; python_version >= "3.10" and python_version < "3.12"
-greenlet==3.1.1 ; python_version >= "3.10" and python_version < "3.12" and (platform_machine == "aarch64" or platform_machine == "ppc64le" or platform_machine == "x86_64" or platform_machine == "amd64" or platform_machine == "AMD64" or platform_machine == "win32" or platform_machine == "WIN32")
-h11==0.14.0 ; python_version >= "3.10" and python_version < "3.12"
-httpcore==1.0.7 ; python_version >= "3.10" and python_version < "3.12"
-httpx==0.28.1 ; python_version >= "3.10" and python_version < "3.12"
-huggingface-hub==0.29.1 ; python_version >= "3.10" and python_version < "3.12"
-humanfriendly==10.0 ; python_version >= "3.10" and python_version < "3.12"
-hyperpyyaml==1.2.2 ; python_version >= "3.10" and python_version < "3.12"
-idna==3.10 ; python_version >= "3.10" and python_version < "3.12"
-importlib-resources==6.5.2 ; python_version >= "3.10" and python_version < "3.12"
-jinja2==3.1.5 ; python_version >= "3.10" and python_version < "3.12"
-joblib==1.4.2 ; python_version >= "3.10" and python_version < "3.12"
-julius==0.2.7 ; python_version >= "3.10" and python_version < "3.12"
-kiwisolver==1.4.8 ; python_version >= "3.10" and python_version < "3.12"
-lightning-utilities==0.12.0 ; python_version >= "3.10" and python_version < "3.12"
-lightning==2.5.0.post0 ; python_version >= "3.10" and python_version < "3.12"
-lxml==5.3.1 ; python_version >= "3.10" and python_version < "3.12"
-mako==1.3.9 ; python_version >= "3.10" and python_version < "3.12"
-markdown-it-py==3.0.0 ; python_version >= "3.10" and python_version < "3.12"
-markupsafe==2.1.5 ; python_version >= "3.10" and python_version < "3.12"
-matplotlib==3.10.1 ; python_version >= "3.10" and python_version < "3.12"
-mdurl==0.1.2 ; python_version >= "3.10" and python_version < "3.12"
-mpmath==1.3.0 ; python_version >= "3.10" and python_version < "3.12"
-multidict==6.1.0 ; python_version >= "3.10" and python_version < "3.12"
-networkx==3.4.2 ; python_version >= "3.10" and python_version < "3.12"
-nltk==3.9.1 ; python_version >= "3.10" and python_version < "3.12"
-numpy==1.26.4 ; python_version >= "3.10" and python_version < "3.12"
-nvidia-cublas-cu12==12.4.5.8 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
-nvidia-cuda-cupti-cu12==12.4.127 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
-nvidia-cuda-nvrtc-cu12==12.4.127 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
-nvidia-cuda-runtime-cu12==12.4.127 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
-nvidia-cudnn-cu12==9.1.0.70 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
-nvidia-cufft-cu12==11.2.1.3 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
-nvidia-curand-cu12==10.3.5.147 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
-nvidia-cusolver-cu12==11.6.1.9 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
-nvidia-cusparse-cu12==12.3.1.170 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
-nvidia-cusparselt-cu12==0.6.2 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
-nvidia-nccl-cu12==2.21.5 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
-nvidia-nvjitlink-cu12==12.4.127 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
-nvidia-nvtx-cu12==12.4.127 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
-omegaconf==2.3.0 ; python_version >= "3.10" and python_version < "3.12"
-onnxruntime==1.20.1 ; python_version >= "3.10" and python_version < "3.12"
-optuna==4.2.1 ; python_version >= "3.10" and python_version < "3.12"
-orjson==3.10.15 ; python_version >= "3.10" and python_version < "3.12"
-packaging==24.2 ; python_version >= "3.10" and python_version < "3.12"
-pandas==2.2.3 ; python_version >= "3.10" and python_version < "3.12"
-pillow==10.4.0 ; python_version >= "3.10" and python_version < "3.12"
-primepy==1.3 ; python_version >= "3.10" and python_version < "3.12"
-propcache==0.3.0 ; python_version >= "3.10" and python_version < "3.12"
-protobuf==5.29.3 ; python_version >= "3.10" and python_version < "3.12"
-pyannote-audio==3.3.2 ; python_version >= "3.10" and python_version < "3.12"
-pyannote-core==5.0.0 ; python_version >= "3.10" and python_version < "3.12"
-pyannote-database==5.1.3 ; python_version >= "3.10" and python_version < "3.12"
-pyannote-metrics==3.2.1 ; python_version >= "3.10" and python_version < "3.12"
-pyannote-pipeline==3.0.1 ; python_version >= "3.10" and python_version < "3.12"
-pycparser==2.22 ; python_version >= "3.10" and python_version < "3.12"
-pydantic-core==2.27.2 ; python_version >= "3.10" and python_version < "3.12"
-pydantic==2.10.6 ; python_version >= "3.10" and python_version < "3.12"
-pydub==0.25.1 ; python_version >= "3.10" and python_version < "3.12"
-pygments==2.19.1 ; python_version >= "3.10" and python_version < "3.12"
-pyparsing==3.2.1 ; python_version >= "3.10" and python_version < "3.12"
-pyreadline3==3.5.4 ; python_version >= "3.10" and python_version < "3.12" and sys_platform == "win32"
-python-dateutil==2.9.0.post0 ; python_version >= "3.10" and python_version < "3.12"
-python-docx==1.1.2 ; python_version >= "3.10" and python_version < "3.12"
-python-dotenv==1.0.1 ; python_version >= "3.10" and python_version < "3.12"
-python-multipart==0.0.20 ; python_version >= "3.10" and python_version < "3.12"
-pytorch-lightning==2.5.0.post0 ; python_version >= "3.10" and python_version < "3.12"
-pytorch-metric-learning==2.8.1 ; python_version >= "3.10" and python_version < "3.12"
-pytz==2025.1 ; python_version >= "3.10" and python_version < "3.12"
-pyyaml==6.0.2 ; python_version >= "3.10" and python_version < "3.12"
-regex==2024.11.6 ; python_version >= "3.10" and python_version < "3.12"
-requests==2.32.3 ; python_version >= "3.10" and python_version < "3.12"
-rich==13.9.4 ; python_version >= "3.10" and python_version < "3.12"
-ruamel-yaml-clib==0.2.12 ; python_version >= "3.10" and python_version < "3.12" and platform_python_implementation == "CPython"
-ruamel-yaml==0.18.10 ; python_version >= "3.10" and python_version < "3.12"
-ruff==0.9.9 ; python_version >= "3.10" and python_version < "3.12" and sys_platform != "emscripten"
-safetensors==0.5.3 ; python_version >= "3.10" and python_version < "3.12"
-scikit-learn==1.6.1 ; python_version >= "3.10" and python_version < "3.12"
-scipy==1.15.2 ; python_version >= "3.10" and python_version < "3.12"
-semantic-version==2.10.0 ; python_version >= "3.10" and python_version < "3.12"
-semver==3.0.4 ; python_version >= "3.10" and python_version < "3.12"
-sentencepiece==0.2.0 ; python_version >= "3.10" and python_version < "3.12"
-setuptools==75.8.2 ; python_version >= "3.10" and python_version < "3.12"
-shellingham==1.5.4 ; python_version >= "3.10" and python_version < "3.12"
-six==1.17.0 ; python_version >= "3.10" and python_version < "3.12"
-sniffio==1.3.1 ; python_version >= "3.10" and python_version < "3.12"
-sortedcontainers==2.4.0 ; python_version >= "3.10" and python_version < "3.12"
-soundfile==0.13.1 ; python_version >= "3.10" and python_version < "3.12"
-speechbrain==1.0.2 ; python_version >= "3.10" and python_version < "3.12"
-sqlalchemy==2.0.38 ; python_version >= "3.10" and python_version < "3.12"
-starlette==0.46.0 ; python_version >= "3.10" and python_version < "3.12"
-sympy==1.13.1 ; python_version >= "3.10" and python_version < "3.12"
-tabulate==0.9.0 ; python_version >= "3.10" and python_version < "3.12"
-tensorboardx==2.6.2.2 ; python_version >= "3.10" and python_version < "3.12"
-threadpoolctl==3.5.0 ; python_version >= "3.10" and python_version < "3.12"
-tokenizers==0.21.0 ; python_version >= "3.10" and python_version < "3.12"
-tomlkit==0.12.0 ; python_version >= "3.10" and python_version < "3.12"
-torch-audiomentations==0.12.0 ; python_version >= "3.10" and python_version < "3.12"
-torch-pitch-shift==1.2.5 ; python_version >= "3.10" and python_version < "3.12"
-torch==2.6.0 ; python_version >= "3.10" and python_version < "3.12"
-torchaudio==2.6.0 ; python_version >= "3.10" and python_version < "3.12"
-torchmetrics==1.6.1 ; python_version >= "3.10" and python_version < "3.12"
-tqdm==4.67.1 ; python_version >= "3.10" and python_version < "3.12"
-transformers==4.49.0 ; python_version >= "3.10" and python_version < "3.12"
-triton==3.2.0 ; python_version >= "3.10" and python_version < "3.12" and platform_system == "Linux" and platform_machine == "x86_64"
-typer==0.15.2 ; python_version >= "3.10" and python_version < "3.12"
-typing-extensions==4.12.2 ; python_version >= "3.10" and python_version < "3.12"
-tzdata==2025.1 ; python_version >= "3.10" and python_version < "3.12"
-urllib3==2.3.0 ; python_version >= "3.10" and python_version < "3.12"
-uvicorn==0.34.0 ; python_version >= "3.10" and python_version < "3.12" and sys_platform != "emscripten"
-websockets==12.0 ; python_version >= "3.10" and python_version < "3.12"
-whisperx==3.3.1 ; python_version >= "3.10" and python_version < "3.12"
-yarl==1.18.3 ; python_version >= "3.10" and python_version < "3.12"

+pydub
+pandas
+numpy
+torch
+torchaudio
+pyannote.audio
+transformers>=4.19.0
+ffmpeg-python==0.2.0
+tqdm
+transformers>=4.19.0
+yt-dlp
+tempfile
+more_itertools
+faster-whisper
+git+https://github.com/m-bain/whisperx.git