summary_tube / app.py
cfc-tech's picture
uu
04a0f7f verified
raw
history blame
2.06 kB
import gradio as gr
from pytube import YouTube
import subprocess
from huggingsound import SpeechRecognitionModel
import torch
from transformers import pipeline
def process_video(video_url):
response = {
'status': 'Success',
'message': '',
'data': ''
}
try:
yt = YouTube(video_url)
audio_file = yt.streams.filter(only_audio=True, file_extension='mp4').first().download(filename='ytaudio.mp4')
subprocess.run(['ffmpeg', '-i', 'ytaudio.mp4', '-acodec', 'pcm_s16le', '-ar', '16000', 'ytaudio.wav'], check=True)
except Exception as e:
response['status'] = 'Error'
response['message'] = f'Failed to download and convert video: {str(e)}'
return response
try:
device = "cuda" if torch.cuda.is_available() else "cpu"
model = SpeechRecognitionModel("jonatasgrosman/wav2vec2-large-xlsr-53-english", device=device)
transcription = model.transcribe(['ytaudio.wav'])[0]['transcription']
except Exception as e:
response['status'] = 'Error'
response['message'] = f'Failed during speech recognition: {str(e)}'
return response
try:
summarization = pipeline('summarization')
summarized_text = summarization(transcription, max_length=130, min_length=30, do_sample=False)
response['data'] = summarized_text[0]['summary_text']
except Exception as e:
response['status'] = 'Error'
response['message'] = f'Failed during summarization: {str(e)}'
return response
return response
iface = gr.Interface(
fn=process_video,
inputs=gr.inputs.Textbox(lines=2, placeholder="Enter YouTube Video URL Here..."),
outputs=[
gr.outputs.Textbox(label="Status"),
gr.outputs.Textbox(label="Message"),
gr.outputs.Textbox(label="Summary")
],
title="YouTube Video Summarizer",
description="This tool extracts audio from a YouTube video, transcribes it, and provides a summary.",
enable_queue=True # Enable request queuing
)
iface.launch()