Spaces:
Sleeping
Sleeping
File size: 2,679 Bytes
52930d0 ce3a3a9 52930d0 bdd5038 b8c5f6f 52930d0 ce3a3a9 45c48be ce3a3a9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
import streamlit as st
from pytube import YouTube
from moviepy.editor import *
import torch
from transformers import Wav2Vec2ForCTC, Wav2Vec2CTCTokenizer, pipeline, logging
import librosa
# Suppress warnings from transformers
logging.set_verbosity_error()
# Streamlit interface setup
st.title("YouTube Video Summarizer")
youtube_link = st.text_input("Enter YouTube Video Link:")
if st.button('Summarize'):
if not youtube_link:
st.warning("Please enter a valid YouTube link.")
else:
with st.spinner("Processing..."):
try:
# Download YouTube Video
yt = YouTube(youtube_link)
video = yt.streams.filter(only_audio=True).first()
download_path = video.download()
# Show progress
st.progress(25)
# Extract Audio
video_clip = AudioFileClip(download_path)
audio_path = download_path.replace('.mp4', '.wav')
video_clip.write_audiofile(audio_path)
# Show progress
st.progress(50)
# Speech to Text
tokenizer = Wav2Vec2CTCTokenizer.from_pretrained("facebook/bart-large-cnn")
model = Wav2Vec2ForCTC.from_pretrained("facebook/bart-large-cnn")
# Load and process the audio
speech, _ = librosa.load(audio_path, sr=16000)
input_values = tokenizer(speech, return_tensors="pt").input_values
logits = model(input_values).logits
predicted_ids = torch.argmax(logits, dim=-1)
# Decode the speech
transcription = tokenizer.batch_decode(predicted_ids, skip_special_tokens=True)[0]
# Ensure transcription is a string, not empty, and not too short
if isinstance(transcription, str) and transcription.strip() and len(transcription) > 50:
# Show progress
st.progress(75)
# Initialize the summarizer
summarizer = pipeline("summarization")
# Summarization
summary = summarizer(transcription, max_length=130, min_length=30, do_sample=False)
st.success("Done!")
st.write("### Summary:")
st.write(summary[0]['summary_text'])
# Final progress
st.progress(100)
else:
st.error("Transcription result is empty, too short, or not a string.")
except Exception as e:
st.error(f"An error occurred: {e}")
|