Spaces:

cfc-tech
/

youtube_summarizer

Sleeping

File size: 3,065 Bytes

import streamlit as st
from pytube import YouTube
from moviepy.editor import *
import torch
from transformers import Wav2Vec2ForCTC, Wav2Vec2CTCTokenizer, pipeline, logging
import librosa

# Suppress warnings from transformers
logging.set_verbosity_error()

# Streamlit interface setup
st.title("YouTube Video Summarizer")

youtube_link = st.text_input("Enter YouTube Video Link:")

if st.button('Summarize'):
    if not youtube_link:
        st.warning("Please enter a valid YouTube link.")
    else:
        with st.spinner("Processing..."):
            try:
                # Download YouTube Video
                yt = YouTube(youtube_link)
                video = yt.streams.first()
                download_path = video.download(skip_existing=True)

                # Show progress
                st.progress(25)

                # Extract Audio
                video_clip = AudioFileClip(download_path)
                audio_path = download_path.replace('.mp4', '.wav')
                video_clip.write_audiofile(audio_path)

                # Show progress
                st.progress(50)

                # Speech to Text
                tokenizer = Wav2Vec2CTCTokenizer.from_pretrained("facebook/wav2vec2-base-960h")
                model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")

                # Load and process the audio
                speech, _ = librosa.load(audio_path, sr=16000)
                input_values = tokenizer(speech, return_tensors="pt").input_values
                logits = model(input_values).logits
                predicted_ids = torch.argmax(logits, dim=-1)

                # Decode the speech
                transcription = tokenizer.batch_decode(predicted_ids, skip_special_tokens=True)[0]

                # Ensure transcription is a string and not empty
                if isinstance(transcription, str) and transcription.strip():
                    st.write("Transcription:", transcription)  # For debugging

                    # Show progress
                    st.progress(75)

                    # Initialize the summarizer
                    summarizer = pipeline("summarization")

                    # Summarization
                    try:
                        # Ensure the transcription is a string
                        transcription_text = str(transcription)
                        summary = summarizer(transcription_text, max_length=130, min_length=30, do_sample=False)
                        st.success("Done!")
                        st.write("### Summary:")
                        st.write(summary[0]['summary_text'])

                        # Final progress
                        st.progress(100)
                    except Exception as summarization_error:
                        st.error(f"Error during summarization: {summarization_error}")
                else:
                    st.error("Could not transcribe audio or transcription is empty.")

            except Exception as general_error:
                st.error(f"An error occurred: {general_error}")