|
import os |
|
|
|
import uuid |
|
import yt_dlp as youtube_dl |
|
from typing import Generator |
|
from faster_whisper import WhisperModel |
|
import pandas as pd |
|
from typing import Generator |
|
from faster_whisper import WhisperModel |
|
import pandas as pd |
|
import gradio as gr |
|
|
|
class YouTubeTranscriber: |
|
def __init__(self, model_path: str): |
|
self.model = WhisperModel(model_path) |
|
|
|
def download_audio(self, url: str, preferred_quality: str = "192") -> str: |
|
file_name = f"{uuid.uuid4()}.mp3" |
|
output_path = os.path.join("/tmp", file_name) |
|
|
|
ydl_opts = { |
|
'format': 'bestaudio/best', |
|
'postprocessors': [{ |
|
'key': 'FFmpegExtractAudio', |
|
'preferredcodec': 'mp3', |
|
'preferredquality': preferred_quality, |
|
}], |
|
'outtmpl': output_path, |
|
} |
|
|
|
try: |
|
with youtube_dl.YoutubeDL(ydl_opts) as ydl: |
|
info_dict = ydl.extract_info(url, download=False) |
|
video_title = info_dict.get('title', 'Unknown title') |
|
print(f"Downloading audio for: {video_title}") |
|
|
|
ydl.download([url]) |
|
print(f"Audio file saved as: {output_path}") |
|
|
|
return output_path |
|
|
|
except youtube_dl.utils.DownloadError as e: |
|
print(f"Error downloading audio: {e}") |
|
return None |
|
|
|
def transcribe_audio(self, path: str) -> Generator: |
|
print(f"Reading {path}") |
|
segments, _ = self.model.transcribe(path) |
|
return segments |
|
|
|
def process_segments(self, segments: Generator) -> pd.DataFrame: |
|
result = [] |
|
for i, segment in enumerate(segments): |
|
result.append({ |
|
'chunk_id': f"chunk_{i}", |
|
'chunk_length': segment.end - segment.start, |
|
'text': segment.text, |
|
'start_time': segment.start, |
|
'end_time': segment.end |
|
}) |
|
|
|
df = pd.DataFrame(result) |
|
return df |
|
|
|
|
|
def transcribe_youtube_video(url: str, model_path: str = "distil-large-v2") -> str: |
|
yt_transcriber = YouTubeTranscriber(model_path) |
|
audio_path = yt_transcriber.download_audio(url) |
|
|
|
if audio_path: |
|
segments = yt_transcriber.transcribe_audio(audio_path) |
|
df = yt_transcriber.process_segments(segments) |
|
output_csv = os.path.join("/tmp", f"{uuid.uuid4()}.csv") |
|
df.to_csv(output_csv, index=False) |
|
return output_csv |
|
else: |
|
return "Failed to download audio." |
|
|
|
|
|
import gradio as gr |
|
|
|
interface = gr.Interface( |
|
fn=transcribe_youtube_video, |
|
inputs=[ |
|
gr.Textbox(lines=1, placeholder="Enter YouTube URL here...", label="YouTube URL"), |
|
gr.Textbox(lines=1, label="Whisper Model Path") |
|
], |
|
outputs=gr.File(label="Transcribed Segments CSV"), |
|
title="YouTube Audio Transcriber", |
|
description="Enter a YouTube URL to download the audio and transcribe it using Whisper." |
|
) |
|
|
|
|
|
interface.launch() |