File size: 2,263 Bytes
52930d0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import streamlit as st
from pytube import YouTube
from moviepy.editor import *
import torch
from transformers import Wav2Vec2ForCTC, Wav2Vec2Tokenizer
from transformers import pipeline
import librosa

# Streamlit interface setup
st.title("YouTube Video Summarizer")

youtube_link = st.text_input("Enter YouTube Video Link:")

if st.button('Summarize'):
    if not youtube_link:
        st.warning("Please enter a valid YouTube link.")
    else:
        with st.spinner("Processing..."):
            try:
                # Download YouTube Video
                yt = YouTube(youtube_link)
                video = yt.streams.filter(only_audio=True).first()
                download_path = video.download()

                # Show progress
                st.progress(25)

                # Extract Audio
                video_clip = AudioFileClip(download_path)
                audio_path = download_path.replace('.mp4', '.wav')
                video_clip.write_audiofile(audio_path)

                # Show progress
                st.progress(50)

                # Speech to Text
                tokenizer = Wav2Vec2Tokenizer.from_pretrained("facebook/wav2vec2-base-960h")
                model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
                
                # Load and process the audio
                speech, _ = librosa.load(audio_path, sr=16000)
                input_values = tokenizer(speech, return_tensors="pt").input_values
                logits = model(input_values).logits
                predicted_ids = torch.argmax(logits, dim=-1)

                # Decode the speech
                transcription = tokenizer.decode(predicted_ids[0])

                # Show progress
                st.progress(75)

                # Summarization
                summarizer = pipeline("summarization")
                summary = summarizer(transcription, max_length=130, min_length=30, do_sample=False)

                # Display the summary
                st.success("Done!")
                st.write("### Summary:")
                st.write(summary[0]['summary_text'])

                # Final progress
                st.progress(100)

            except Exception as e:
                st.error(f"An error occurred: {e}")