File size: 3,224 Bytes
9431860
2a991e1
 
 
 
 
 
083a014
2a991e1
 
 
 
 
 
083a014
2a991e1
 
 
 
 
 
 
083a014
2a991e1
 
 
 
 
 
 
083a014
2a991e1
 
 
 
 
 
 
236b4e0
2a991e1
 
 
 
 
 
 
 
 
58ddc5a
2a991e1
 
 
 
 
 
 
 
 
 
 
9431860
2a991e1
 
 
 
9431860
2a991e1
 
083a014
2a991e1
a1bccac
2a991e1
236b4e0
2a991e1
 
 
236b4e0
2a991e1
 
236b4e0
2a991e1
 
 
236b4e0
2a991e1
 
236b4e0
2a991e1
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import os
import gradio as gr
from moviepy.video.io.VideoFileClip import VideoFileClip
from pydub import AudioSegment
import whisper
from transformers import pipeline, MarianMTModel, MarianTokenizer
import yt_dlp as youtube_dl

def download_youtube_video(url):
    try:
        os.system(f"yt-dlp -o video.mp4 {url}")
        return "video.mp4"
    except Exception as e:
        return str(e)

def extract_audio(video_path):
    try:
        audio = AudioSegment.from_file(video_path)
        audio.export("extracted_audio.mp3", format="mp3")
        return "extracted_audio.mp3"
    except Exception as e:
        return str(e)

def transcribe_audio(audio_path):
    try:
        model = whisper.load_model("base")
        result = model.transcribe(audio_path)
        return result['text']
    except Exception as e:
        return str(e)

def summarize_text(text):
    try:
        summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
        summary = summarizer(text, max_length=150, min_length=30, do_sample=False)
        return summary[0]['summary_text']
    except Exception as e:
        return str(e)

def translate_text(text, tgt_lang="es"):
    try:
        model_name = f"Helsinki-NLP/opus-mt-en-{tgt_lang}"
        tokenizer = MarianTokenizer.from_pretrained(model_name)
        model = MarianMTModel.from_pretrained(model_name)
        translated = model.generate(**tokenizer(text, return_tensors="pt", padding=True))
        return tokenizer.decode(translated[0], skip_special_tokens=True)
    except Exception as e:
        return str(e)

def process_video(video_file, youtube_url):
    if youtube_url:
        video_path = download_youtube_video(youtube_url)
    elif video_file:
        video_path = video_file.name
    else:
        return "No valid input provided."
    
    audio_path = extract_audio(video_path)
    transcription = transcribe_audio(audio_path)
    return transcription

def summarize_and_translate(text, lang):
    summary = summarize_text(text)
    translation = translate_text(summary, lang)
    return summary, translation

with gr.Blocks() as app:
    gr.Markdown("# 🎥 Smart Video-to-Text Summarization App")
    
    with gr.Row():
        video_upload = gr.File(label="Upload Video File", type="filepath")
        youtube_link = gr.Textbox(label="YouTube URL")
    
    process_button = gr.Button("🚀 Process Video")
    transcription_output = gr.Textbox(label="Transcription", interactive=False)
    process_button.click(process_video, inputs=[video_upload, youtube_link], outputs=transcription_output)
    
    summarize_button = gr.Button("📝 Summarize Text")
    summary_output = gr.Textbox(label="Summary", interactive=False)
    
    translate_button = gr.Button("🌍 Translate Summary")
    language_dropdown = gr.Dropdown(choices=["es", "fr", "de", "zh"], label="Select Translation Language")
    translated_output = gr.Textbox(label="Translated Summary", interactive=False)
    
    summarize_button.click(summarize_text, inputs=transcription_output, outputs=summary_output)
    translate_button.click(summarize_and_translate, inputs=[transcription_output, language_dropdown], outputs=[summary_output, translated_output])
    
app.launch()