Video_Summ / app.py
JaganathC's picture
Update app.py
c71ebe4 verified
raw
history blame
4.25 kB
import os
import gradio as gr
from moviepy.video.io.VideoFileClip import VideoFileClip
from pydub import AudioSegment
import whisper
from transformers import pipeline, MarianMTModel, MarianTokenizer
import yt_dlp as youtube_dl
def download_youtube_video(url):
try:
os.system(f"yt-dlp -o video.mp4 {url}")
return "video.mp4"
except Exception as e:
return str(e)
def extract_audio(video_path):
try:
audio = AudioSegment.from_file(video_path)
audio.export("extracted_audio.mp3", format="mp3")
return "extracted_audio.mp3"
except Exception as e:
return str(e)
def transcribe_audio(audio_path):
try:
model = whisper.load_model("base")
result = model.transcribe(audio_path)
return result['text']
except Exception as e:
return str(e)
def summarize_text(text):
try:
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
summary = summarizer(text, max_length=150, min_length=30, do_sample=False)
return summary[0]['summary_text']
except Exception as e:
return str(e)
def translate_text(text, tgt_lang="es"):
try:
model_name = f"Helsinki-NLP/opus-mt-en-{tgt_lang}"
tokenizer = MarianTokenizer.from_pretrained(model_name)
model = MarianMTModel.from_pretrained(model_name)
translated = model.generate(**tokenizer(text, return_tensors="pt", padding=True))
return tokenizer.decode(translated[0], skip_special_tokens=True)
except Exception as e:
return str(e)
def process_video(video_file, youtube_url, video_source):
if video_source == "Local Video" and video_file:
video_path = video_file.name
elif video_source == "YouTube" and youtube_url:
video_path = download_youtube_video(youtube_url)
else:
return "No valid input provided."
audio_path = extract_audio(video_path)
transcription = transcribe_audio(audio_path)
return transcription, video_path
def summarize_and_translate(text, lang):
summary = summarize_text(text)
translation = translate_text(summary, lang)
return summary, translation
with gr.Blocks(css="""
.glass-card {
background: rgba(255, 255, 255, 0.1);
backdrop-filter: blur(10px);
border: 1px solid rgba(255, 255, 255, 0.2);
border-radius: 20px;
padding: 2rem;
transition: transform 0.3s ease, box-shadow 0.3s ease;
}
.btn-blue {
background-color: #007BFF;
color: white;
border-radius: 10px;
}
.btn-blue:hover {
background-color: #0056b3;
}
.gradient-font {
background: linear-gradient(90deg, #ff7f50, #ff6347);
-webkit-background-clip: text;
color: transparent;
}
""") as app:
gr.Markdown("<h1 class='gradient-font'>🎥 Smart Video-to-Text Summarization App</h1>")
with gr.Row():
video_source = gr.Radio(["Local Video", "YouTube"], label="Choose Video Source", value="Local Video")
video_upload = gr.File(label="Upload Video File", type="filepath")
youtube_link = gr.Textbox(label="YouTube URL")
video_display = gr.Video(label="Processed Video", visible=False)
process_button = gr.Button("🚀 Process Video", elem_classes=["btn-blue"])
transcription_output = gr.Textbox(label="Transcription", interactive=False)
process_button.click(process_video, inputs=[video_upload, youtube_link, video_source], outputs=[transcription_output, video_display])
summarize_button = gr.Button("📝 Summarize Text", elem_classes=["btn-blue"])
summary_output = gr.Textbox(label="Summary", interactive=False)
translate_button = gr.Button("🌍 Translate Summary", elem_classes=["btn-blue"])
language_dropdown = gr.Dropdown(choices=["es", "fr", "de", "zh"], label="Select Translation Language")
translated_output = gr.Textbox(label="Translated Summary", interactive=False)
summarize_button.click(summarize_text, inputs=transcription_output, outputs=summary_output)
translate_button.click(summarize_and_translate, inputs=[transcription_output, language_dropdown], outputs=[summary_output, translated_output])
app.launch()