Spaces:
Sleeping
Sleeping
File size: 3,839 Bytes
6c226f9 8e787d3 6c226f9 d790c0b a414c37 7bbd83c 554c0b5 d790c0b 88183ad 7bbd83c 592b794 6c226f9 a11fbef a5bfe25 9d6fa91 554c0b5 6c226f9 7bbd83c 6c226f9 554c0b5 8dba9f0 7bbd83c 6c226f9 554c0b5 7bbd83c 520f263 7bbd83c 6c226f9 c5ddbf5 592b794 7bbd83c 3c0cd8e 7bbd83c 6c226f9 d790c0b 554c0b5 d790c0b 7bbd83c d790c0b 7bbd83c 66efbc3 c5ddbf5 592b794 7bbd83c d790c0b b97a3c2 3c0cd8e 7bbd83c 6c226f9 7bbd83c 6c226f9 7bbd83c 6c226f9 7097513 3ce82e9 7097513 7bbd83c a5bfe25 6c226f9 b95b5ca 6c226f9 7bbd83c 6c226f9 ab14d7d |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
import gradio as gr
import yt_dlp as youtube_dl
from transformers import pipeline
from transformers.pipelines.audio_utils import ffmpeg_read
import torch
from huggingface_hub import CommitScheduler
import spaces
import tempfile
import os
import json
from datetime import datetime
from pathlib import Path
from uuid import uuid4
from functools import lru_cache
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"
MODEL_NAME = "openai/whisper-large-v3"
BATCH_SIZE = 8
YT_LENGTH_LIMIT_S = 4800 # 1 hour 20 minutes
device = 0 if torch.cuda.is_available() else "cpu"
pipe = pipeline(task="automatic-speech-recognition", model=MODEL_NAME, chunk_length_s=30, device=device)
# Define paths and create directory if not exists
JSON_DATASET_DIR = Path("json_dataset")
JSON_DATASET_DIR.mkdir(parents=True, exist_ok=True)
JSON_DATASET_PATH = JSON_DATASET_DIR / f"transcriptions-{uuid4()}.json"
# Initialize CommitScheduler for saving data to Hugging Face Dataset
scheduler = CommitScheduler(
repo_id="transcript-dataset-repo",
repo_type="dataset",
folder_path=JSON_DATASET_DIR,
path_in_repo="data",
)
@spaces.GPU(duration=120)
@lru_cache(maxsize=10)
def transcribe_audio(inputs, task):
if inputs is None:
raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
return text
def download_yt_audio(yt_url, filename):
info_loader = youtube_dl.YoutubeDL()
try:
info = info_loader.extract_info(yt_url, download=False)
except youtube_dl.utils.DownloadError as err:
raise gr.Error(str(err))
file_length = info["duration"]
if file_length > YT_LENGTH_LIMIT_S:
yt_length_limit_hms = time.strftime("%H:%M:%S", time.gmtime(YT_LENGTH_LIMIT_S))
file_length_hms = time.strftime("%H:%M:%S", time.gmtime(file_length))
raise gr.Error(f"Maximum YouTube length is {yt_length_limit_hms}, got {file_length_hms} YouTube video.")
ydl_opts = {"outtmpl": filename, "format": "bestaudio/best"}
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
ydl.download([yt_url])
@spaces.GPU(duration=120)
@lru_cache(maxsize=10)
def yt_transcribe(yt_url, task):
with tempfile.TemporaryDirectory() as tmpdirname:
filepath = os.path.join(tmpdirname, "video.mp4")
download_yt_audio(yt_url, filepath)
with open(filepath, "rb") as f:
inputs = f.read()
inputs = ffmpeg_read(inputs, pipe.feature_extractor.sampling_rate)
inputs = {"array": inputs, "sampling_rate": pipe.feature_extractor.sampling_rate}
text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
save_transcription(yt_url, text)
return text
def save_transcription(yt_url, transcription):
with scheduler.lock:
with JSON_DATASET_PATH.open("a") as f:
json.dump({"url": yt_url, "transcription": transcription, "datetime": datetime.now().isoformat()}, f)
f.write("\n")
demo = gr.Blocks()
yt_transcribe_interface = gr.Interface(
fn=yt_transcribe,
inputs=[
gr.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"),
gr.Radio(["transcribe", "translate"], label="Task", value="transcribe")
],
outputs="text",
title="Whisper Large V3: Transcribe YouTube",
description=(
"Transcribe long-form YouTube videos with the click of a button! Demo uses the checkpoint"
f" [{MODEL_NAME}](https://huggingface.co/{MODEL_NAME}) and 🤗 Transformers to transcribe video files of"
" arbitrary length."
),
allow_flagging="never",
)
with demo:
gr.TabbedInterface([yt_transcribe_interface], ["YouTube"])
demo.queue().launch()
|