Spaces:

das1mtb56
/

yt_trans

Running

File size: 3,341 Bytes

import os
import gradio as gr
import yt_dlp
import whisper
from transformers import pipeline, MarianMTModel, MarianTokenizer
import torch

# Load Whisper model
whisper_model = whisper.load_model("small")

# Load summarizer
summarizer = pipeline("summarization", model="Falconsai/text_summarization")

# Load translation model (multilingual to English)
translation_tokenizer = MarianTokenizer.from_pretrained("Helsinki-NLP/opus-mt-mul-en")
translation_model = MarianMTModel.from_pretrained("Helsinki-NLP/opus-mt-mul-en")

def download_audio(youtube_url):
    output_file = "audio.webm"
    ydl_opts = {
        'format': 'bestaudio/best',
        'outtmpl': output_file,
        'quiet': True,
    }
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        ydl.download([youtube_url])
    return output_file

def get_thumbnail(youtube_url):
    ydl_opts = {'quiet': True}
    with yt_dlp.YoutubeDL(ydl_opts) as ydl:
        info = ydl.extract_info(youtube_url, download=False)
        return info.get("thumbnail", "")

def translate_to_english(text):
    chunks = [text[i:i+500] for i in range(0, len(text), 500)]
    translated = []
    for chunk in chunks:
        inputs = translation_tokenizer(chunk, return_tensors="pt", truncation=True, max_length=512)
        output = translation_model.generate(**inputs, max_length=512)
        translated.append(translation_tokenizer.decode(output[0], skip_special_tokens=True))
    return " ".join(translated)

def process_video(url):
    audio_path = download_audio(url)
    result = whisper_model.transcribe(audio_path)
    transcription = result["text"]

    translated_text = translate_to_english(transcription)

    # Summarize
    summary = summarizer(translated_text, max_length=130, min_length=30, do_sample=False)[0]["summary_text"]

    # Get thumbnail
    thumbnail_url = get_thumbnail(url)

    return transcription, translated_text, summary, thumbnail_url

def download_summary(text):
    filename = "summary.txt"
    with open(filename, "w", encoding="utf-8") as f:
        f.write(text)
    return filename

# UI
with gr.Blocks(theme=gr.themes.Soft(), title="🎥 YouTube Video Summarizer with LLaMA") as demo:
    gr.Markdown("## 🧠 Multilingual YouTube Summarizer")
    gr.Markdown("Upload a video link and get the transcript, English translation, and summary.")

    with gr.Row():
        youtube_input = gr.Text(label="YouTube Video URL", placeholder="https://www.youtube.com/watch?v=...")
        submit_btn = gr.Button("Transcribe & Summarize")

    with gr.Row():
        with gr.Column():
            transcript_output = gr.Textbox(label="🔊 Original Transcript", lines=10)
            translation_output = gr.Textbox(label="🌍 Translated to English", lines=10)
            summary_output = gr.Textbox(label="🧾 Summary", lines=10)
            download_btn = gr.Button("📥 Download Summary")
            download_file = gr.File(label="Download Link")
        video_thumb = gr.Image(label="🎞️ Video Thumbnail", width=256)

    # Button actions
    submit_btn.click(
        fn=process_video,
        inputs=[youtube_input],
        outputs=[transcript_output, translation_output, summary_output, video_thumb]
    )

    download_btn.click(
        fn=download_summary,
        inputs=[summary_output],
        outputs=[download_file]
    )

demo.launch(share=True)