File size: 2,744 Bytes
52930d0
 
 
 
b8c5f6f
52930d0
 
fa63d63
b8c5f6f
 
52930d0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7912bc7
 
52930d0
 
 
 
 
 
 
 
 
fa63d63
52930d0
fa63d63
 
 
 
52930d0
fa63d63
 
 
52930d0
fa63d63
 
 
 
52930d0
fa63d63
 
 
 
52930d0
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
import streamlit as st
from pytube import YouTube
from moviepy.editor import *
import torch
from transformers import Wav2Vec2ForCTC, Wav2Vec2CTCTokenizer, pipeline, logging
import librosa

# Suppress warnings from transformers to clean up the output
logging.set_verbosity_error()

# Streamlit interface setup
st.title("YouTube Video Summarizer")

youtube_link = st.text_input("Enter YouTube Video Link:")

if st.button('Summarize'):
    if not youtube_link:
        st.warning("Please enter a valid YouTube link.")
    else:
        with st.spinner("Processing..."):
            try:
                # Download YouTube Video
                yt = YouTube(youtube_link)
                video = yt.streams.filter(only_audio=True).first()
                download_path = video.download()

                # Show progress
                st.progress(25)

                # Extract Audio
                video_clip = AudioFileClip(download_path)
                audio_path = download_path.replace('.mp4', '.wav')
                video_clip.write_audiofile(audio_path)

                # Show progress
                st.progress(50)

                # Speech to Text using the corrected tokenizer
                tokenizer = Wav2Vec2CTCTokenizer.from_pretrained("facebook/wav2vec2-base-960h")
                model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
                
                # Load and process the audio
                speech, _ = librosa.load(audio_path, sr=16000)
                input_values = tokenizer(speech, return_tensors="pt").input_values
                logits = model(input_values).logits
                predicted_ids = torch.argmax(logits, dim=-1)

                # Decode the speech
                transcription = tokenizer.batch_decode(predicted_ids, skip_special_tokens=True)[0]  # Ensure transcription is a single string

                # Ensure transcription is a string and not empty
                if isinstance(transcription, str) and transcription.strip():
                    # Show progress
                    st.progress(75)

                    # Summarization
                    summarizer = pipeline("summarization")
                    summary = summarizer(transcription, max_length=130, min_length=30, do_sample=False)

                    # Display the summary
                    st.success("Done!")
                    st.write("### Summary:")
                    st.write(summary[0]['summary_text'])

                    # Final progress
                    st.progress(100)
                else:
                    st.error("Could not transcribe audio or transcription is empty.")

            except Exception as e:
                st.error(f"An error occurred: {e}")