Spaces:

JaganathC
/

Video_Summ

Running

File size: 3,224 Bytes

import os
import gradio as gr
from moviepy.video.io.VideoFileClip import VideoFileClip
from pydub import AudioSegment
import whisper
from transformers import pipeline, MarianMTModel, MarianTokenizer
import yt_dlp as youtube_dl

def download_youtube_video(url):
    try:
        os.system(f"yt-dlp -o video.mp4 {url}")
        return "video.mp4"
    except Exception as e:
        return str(e)

def extract_audio(video_path):
    try:
        audio = AudioSegment.from_file(video_path)
        audio.export("extracted_audio.mp3", format="mp3")
        return "extracted_audio.mp3"
    except Exception as e:
        return str(e)

def transcribe_audio(audio_path):
    try:
        model = whisper.load_model("base")
        result = model.transcribe(audio_path)
        return result['text']
    except Exception as e:
        return str(e)

def summarize_text(text):
    try:
        summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
        summary = summarizer(text, max_length=150, min_length=30, do_sample=False)
        return summary[0]['summary_text']
    except Exception as e:
        return str(e)

def translate_text(text, tgt_lang="es"):
    try:
        model_name = f"Helsinki-NLP/opus-mt-en-{tgt_lang}"
        tokenizer = MarianTokenizer.from_pretrained(model_name)
        model = MarianMTModel.from_pretrained(model_name)
        translated = model.generate(**tokenizer(text, return_tensors="pt", padding=True))
        return tokenizer.decode(translated[0], skip_special_tokens=True)
    except Exception as e:
        return str(e)

def process_video(video_file, youtube_url):
    if youtube_url:
        video_path = download_youtube_video(youtube_url)
    elif video_file:
        video_path = video_file.name
    else:
        return "No valid input provided."
    
    audio_path = extract_audio(video_path)
    transcription = transcribe_audio(audio_path)
    return transcription

def summarize_and_translate(text, lang):
    summary = summarize_text(text)
    translation = translate_text(summary, lang)
    return summary, translation

with gr.Blocks() as app:
    gr.Markdown("# 🎥 Smart Video-to-Text Summarization App")
    
    with gr.Row():
        video_upload = gr.File(label="Upload Video File", type="filepath")
        youtube_link = gr.Textbox(label="YouTube URL")
    
    process_button = gr.Button("🚀 Process Video")
    transcription_output = gr.Textbox(label="Transcription", interactive=False)
    process_button.click(process_video, inputs=[video_upload, youtube_link], outputs=transcription_output)
    
    summarize_button = gr.Button("📝 Summarize Text")
    summary_output = gr.Textbox(label="Summary", interactive=False)
    
    translate_button = gr.Button("🌍 Translate Summary")
    language_dropdown = gr.Dropdown(choices=["es", "fr", "de", "zh"], label="Select Translation Language")
    translated_output = gr.Textbox(label="Translated Summary", interactive=False)
    
    summarize_button.click(summarize_text, inputs=transcription_output, outputs=summary_output)
    translate_button.click(summarize_and_translate, inputs=[transcription_output, language_dropdown], outputs=[summary_output, translated_output])
    
app.launch()