summary_tube / app.py
cfc-tech's picture
commit
c1f076a verified
raw
history blame
1.75 kB
import gradio as gr
from pytube import YouTube
import subprocess
from huggingsound import SpeechRecognitionModel
import torch
import librosa
from transformers import pipeline
def process_video(video_url):
try:
# Download audio from YouTube
yt = YouTube(video_url)
audio_file = yt.streams.filter(only_audio=True, file_extension='mp4').first().download(filename='ytaudio.mp4')
except Exception as e:
return f"Error downloading audio from YouTube: {e}"
try:
# Convert to suitable format for speech recognition
subprocess.run(['ffmpeg', '-i', 'ytaudio.mp4', '-acodec', 'pcm_s16le', '-ar', '16000', 'ytaudio.wav'], check=True)
except subprocess.CalledProcessError as e:
return f"Error converting audio file: {e}"
try:
# Speech Recognition
device = "cuda" if torch.cuda.is_available() else "cpu"
model = SpeechRecognitionModel("jonatasgrosman/wav2vec2-large-xlsr-53-english", device=device)
transcription = model.transcribe(['ytaudio.wav'])[0]['transcription']
except Exception as e:
return f"Error in speech recognition: {e}"
try:
# Summarize Transcription
summarization = pipeline('summarization')
summarized_text = summarization(transcription, max_length=130, min_length=30, do_sample=False)
return summarized_text[0]['summary_text']
except Exception as e:
return f"Error summarizing text: {e}"
iface = gr.Interface(fn=process_video, inputs=gr.inputs.Textbox(lines=2, placeholder="Enter YouTube Video URL Here..."), outputs="text", title="YouTube Video Summarizer", description="This tool extracts audio from a YouTube video, transcribes it, and provides a summary.")
iface.launch()