Spaces:
Sleeping
Sleeping
File size: 3,065 Bytes
52930d0 b8c5f6f 52930d0 bdd5038 b8c5f6f 52930d0 bdd5038 52930d0 bdd5038 7912bc7 52930d0 bdd5038 52930d0 bdd5038 248c174 52930d0 fa63d63 bdd5038 248c174 fa63d63 52930d0 bdd5038 fa63d63 bdd5038 248c174 bdd5038 248c174 52930d0 248c174 bdd5038 fa63d63 52930d0 bdd5038 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
import streamlit as st
from pytube import YouTube
from moviepy.editor import *
import torch
from transformers import Wav2Vec2ForCTC, Wav2Vec2CTCTokenizer, pipeline, logging
import librosa
# Suppress warnings from transformers
logging.set_verbosity_error()
# Streamlit interface setup
st.title("YouTube Video Summarizer")
youtube_link = st.text_input("Enter YouTube Video Link:")
if st.button('Summarize'):
if not youtube_link:
st.warning("Please enter a valid YouTube link.")
else:
with st.spinner("Processing..."):
try:
# Download YouTube Video
yt = YouTube(youtube_link)
video = yt.streams.first()
download_path = video.download(skip_existing=True)
# Show progress
st.progress(25)
# Extract Audio
video_clip = AudioFileClip(download_path)
audio_path = download_path.replace('.mp4', '.wav')
video_clip.write_audiofile(audio_path)
# Show progress
st.progress(50)
# Speech to Text
tokenizer = Wav2Vec2CTCTokenizer.from_pretrained("facebook/wav2vec2-base-960h")
model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
# Load and process the audio
speech, _ = librosa.load(audio_path, sr=16000)
input_values = tokenizer(speech, return_tensors="pt").input_values
logits = model(input_values).logits
predicted_ids = torch.argmax(logits, dim=-1)
# Decode the speech
transcription = tokenizer.batch_decode(predicted_ids, skip_special_tokens=True)[0]
# Ensure transcription is a string and not empty
if isinstance(transcription, str) and transcription.strip():
st.write("Transcription:", transcription) # For debugging
# Show progress
st.progress(75)
# Initialize the summarizer
summarizer = pipeline("summarization")
# Summarization
try:
# Ensure the transcription is a string
transcription_text = str(transcription)
summary = summarizer(transcription_text, max_length=130, min_length=30, do_sample=False)
st.success("Done!")
st.write("### Summary:")
st.write(summary[0]['summary_text'])
# Final progress
st.progress(100)
except Exception as summarization_error:
st.error(f"Error during summarization: {summarization_error}")
else:
st.error("Could not transcribe audio or transcription is empty.")
except Exception as general_error:
st.error(f"An error occurred: {general_error}")
|