File size: 3,065 Bytes
52930d0
 
 
 
b8c5f6f
52930d0
 
bdd5038
b8c5f6f
 
52930d0
 
 
 
 
 
 
 
 
 
 
 
 
bdd5038
 
52930d0
 
 
 
 
 
 
 
 
 
 
 
bdd5038
7912bc7
52930d0
bdd5038
52930d0
 
 
 
 
 
bdd5038
248c174
52930d0
fa63d63
 
bdd5038
248c174
fa63d63
 
52930d0
bdd5038
fa63d63
bdd5038
 
248c174
bdd5038
 
 
248c174
 
 
52930d0
248c174
 
bdd5038
 
fa63d63
 
52930d0
bdd5038
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import streamlit as st
from pytube import YouTube
from moviepy.editor import *
import torch
from transformers import Wav2Vec2ForCTC, Wav2Vec2CTCTokenizer, pipeline, logging
import librosa

# Suppress warnings from transformers
logging.set_verbosity_error()

# Streamlit interface setup
st.title("YouTube Video Summarizer")

youtube_link = st.text_input("Enter YouTube Video Link:")

if st.button('Summarize'):
    if not youtube_link:
        st.warning("Please enter a valid YouTube link.")
    else:
        with st.spinner("Processing..."):
            try:
                # Download YouTube Video
                yt = YouTube(youtube_link)
                video = yt.streams.first()
                download_path = video.download(skip_existing=True)

                # Show progress
                st.progress(25)

                # Extract Audio
                video_clip = AudioFileClip(download_path)
                audio_path = download_path.replace('.mp4', '.wav')
                video_clip.write_audiofile(audio_path)

                # Show progress
                st.progress(50)

                # Speech to Text
                tokenizer = Wav2Vec2CTCTokenizer.from_pretrained("facebook/wav2vec2-base-960h")
                model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")

                # Load and process the audio
                speech, _ = librosa.load(audio_path, sr=16000)
                input_values = tokenizer(speech, return_tensors="pt").input_values
                logits = model(input_values).logits
                predicted_ids = torch.argmax(logits, dim=-1)

                # Decode the speech
                transcription = tokenizer.batch_decode(predicted_ids, skip_special_tokens=True)[0]

                # Ensure transcription is a string and not empty
                if isinstance(transcription, str) and transcription.strip():
                    st.write("Transcription:", transcription)  # For debugging

                    # Show progress
                    st.progress(75)

                    # Initialize the summarizer
                    summarizer = pipeline("summarization")

                    # Summarization
                    try:
                        # Ensure the transcription is a string
                        transcription_text = str(transcription)
                        summary = summarizer(transcription_text, max_length=130, min_length=30, do_sample=False)
                        st.success("Done!")
                        st.write("### Summary:")
                        st.write(summary[0]['summary_text'])

                        # Final progress
                        st.progress(100)
                    except Exception as summarization_error:
                        st.error(f"Error during summarization: {summarization_error}")
                else:
                    st.error("Could not transcribe audio or transcription is empty.")

            except Exception as general_error:
                st.error(f"An error occurred: {general_error}")