Spaces:
Running
Running
import os | |
import gradio as gr | |
from moviepy.video.io.VideoFileClip import VideoFileClip | |
from pydub import AudioSegment | |
import whisper | |
from transformers import pipeline, MarianMTModel, MarianTokenizer | |
import yt_dlp as youtube_dl | |
def download_youtube_video(url): | |
try: | |
os.system(f"yt-dlp -o video.mp4 {url}") | |
return "video.mp4" | |
except Exception as e: | |
return str(e) | |
def extract_audio(video_path): | |
try: | |
audio = AudioSegment.from_file(video_path) | |
audio.export("extracted_audio.mp3", format="mp3") | |
return "extracted_audio.mp3" | |
except Exception as e: | |
return str(e) | |
def transcribe_audio(audio_path): | |
try: | |
model = whisper.load_model("base") | |
result = model.transcribe(audio_path) | |
return result['text'] | |
except Exception as e: | |
return str(e) | |
def summarize_text(text): | |
try: | |
summarizer = pipeline("summarization", model="facebook/bart-large-cnn") | |
summary = summarizer(text, max_length=150, min_length=30, do_sample=False) | |
return summary[0]['summary_text'] | |
except Exception as e: | |
return str(e) | |
def translate_text(text, tgt_lang="es"): | |
try: | |
model_name = f"Helsinki-NLP/opus-mt-en-{tgt_lang}" | |
tokenizer = MarianTokenizer.from_pretrained(model_name) | |
model = MarianMTModel.from_pretrained(model_name) | |
translated = model.generate(**tokenizer(text, return_tensors="pt", padding=True)) | |
return tokenizer.decode(translated[0], skip_special_tokens=True) | |
except Exception as e: | |
return str(e) | |
def process_video(video_file, youtube_url): | |
if youtube_url: | |
video_path = download_youtube_video(youtube_url) | |
elif video_file: | |
video_path = video_file.name | |
else: | |
return "No valid input provided." | |
audio_path = extract_audio(video_path) | |
transcription = transcribe_audio(audio_path) | |
return transcription | |
def summarize_and_translate(text, lang): | |
summary = summarize_text(text) | |
translation = translate_text(summary, lang) | |
return summary, translation | |
with gr.Blocks() as app: | |
gr.Markdown("# 🎥 Smart Video-to-Text Summarization App") | |
with gr.Row(): | |
video_upload = gr.File(label="Upload Video File", type="filepath") | |
youtube_link = gr.Textbox(label="YouTube URL") | |
process_button = gr.Button("🚀 Process Video") | |
transcription_output = gr.Textbox(label="Transcription", interactive=False) | |
process_button.click(process_video, inputs=[video_upload, youtube_link], outputs=transcription_output) | |
summarize_button = gr.Button("📝 Summarize Text") | |
summary_output = gr.Textbox(label="Summary", interactive=False) | |
translate_button = gr.Button("🌍 Translate Summary") | |
language_dropdown = gr.Dropdown(choices=["es", "fr", "de", "zh"], label="Select Translation Language") | |
translated_output = gr.Textbox(label="Translated Summary", interactive=False) | |
summarize_button.click(summarize_text, inputs=transcription_output, outputs=summary_output) | |
translate_button.click(summarize_and_translate, inputs=[transcription_output, language_dropdown], outputs=[summary_output, translated_output]) | |
app.launch() | |