yt-chunks / app.py
archit11's picture
Create app.py
a2632d3 verified
raw
history blame
3.15 kB
import os
import uuid
import yt_dlp as youtube_dl
from typing import Generator
from faster_whisper import WhisperModel
import pandas as pd
from typing import Generator
from faster_whisper import WhisperModel
import pandas as pd
import gradio as gr
class YouTubeTranscriber:
def __init__(self, model_path: str):
self.model = WhisperModel(model_path)
def download_audio(self, url: str, preferred_quality: str = "192") -> str:
file_name = f"{uuid.uuid4()}.mp3"
output_path = os.path.join("/tmp", file_name) # Use /tmp directory for temporary storage
ydl_opts = {
'format': 'bestaudio/best',
'postprocessors': [{
'key': 'FFmpegExtractAudio',
'preferredcodec': 'mp3',
'preferredquality': preferred_quality,
}],
'outtmpl': output_path, # Specify the output path and file name
}
try:
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
info_dict = ydl.extract_info(url, download=False)
video_title = info_dict.get('title', 'Unknown title')
print(f"Downloading audio for: {video_title}")
ydl.download([url])
print(f"Audio file saved as: {output_path}")
return output_path
except youtube_dl.utils.DownloadError as e:
print(f"Error downloading audio: {e}")
return None
def transcribe_audio(self, path: str) -> Generator:
print(f"Reading {path}")
segments, _ = self.model.transcribe(path)
return segments
def process_segments(self, segments: Generator) -> pd.DataFrame:
result = []
for i, segment in enumerate(segments):
result.append({
'chunk_id': f"chunk_{i}",
'chunk_length': segment.end - segment.start,
'text': segment.text,
'start_time': segment.start,
'end_time': segment.end
})
df = pd.DataFrame(result)
return df
# Function to be called by the Gradio interface
def transcribe_youtube_video(url: str, model_path: str = "distil-large-v2") -> str:
yt_transcriber = YouTubeTranscriber(model_path)
audio_path = yt_transcriber.download_audio(url)
if audio_path:
segments = yt_transcriber.transcribe_audio(audio_path)
df = yt_transcriber.process_segments(segments)
output_csv = os.path.join("/tmp", f"{uuid.uuid4()}.csv")
df.to_csv(output_csv, index=False)
return output_csv
else:
return "Failed to download audio."
import gradio as gr
interface = gr.Interface(
fn=transcribe_youtube_video,
inputs=[
gr.Textbox(lines=1, placeholder="Enter YouTube URL here...", label="YouTube URL"),
gr.Textbox(lines=1, label="Whisper Model Path")
],
outputs=gr.File(label="Transcribed Segments CSV"), # Use gr.File directly
title="YouTube Audio Transcriber",
description="Enter a YouTube URL to download the audio and transcribe it using Whisper."
)
# Launch the interface
interface.launch()