Spaces:

cfc-tech
/

youtube_summarizer

Sleeping

App Files Files Community

cfc-tech commited on Apr 1, 2024

Commit

52930d0

verified ·

1 Parent(s): d85731d

first commit

Browse files

Files changed (1) hide show

app.py +66 -0

app.py ADDED Viewed

	@@ -0,0 +1,66 @@

+import streamlit as st
+from pytube import YouTube
+from moviepy.editor import *
+import torch
+from transformers import Wav2Vec2ForCTC, Wav2Vec2Tokenizer
+from transformers import pipeline
+import librosa
+# Streamlit interface setup
+st.title("YouTube Video Summarizer")
+youtube_link = st.text_input("Enter YouTube Video Link:")
+if st.button('Summarize'):
+    if not youtube_link:
+        st.warning("Please enter a valid YouTube link.")
+    else:
+        with st.spinner("Processing..."):
+            try:
+                # Download YouTube Video
+                yt = YouTube(youtube_link)
+                video = yt.streams.filter(only_audio=True).first()
+                download_path = video.download()
+                # Show progress
+                st.progress(25)
+                # Extract Audio
+                video_clip = AudioFileClip(download_path)
+                audio_path = download_path.replace('.mp4', '.wav')
+                video_clip.write_audiofile(audio_path)
+                # Show progress
+                st.progress(50)
+                # Speech to Text
+                tokenizer = Wav2Vec2Tokenizer.from_pretrained("facebook/wav2vec2-base-960h")
+                model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
+                # Load and process the audio
+                speech, _ = librosa.load(audio_path, sr=16000)
+                input_values = tokenizer(speech, return_tensors="pt").input_values
+                logits = model(input_values).logits
+                predicted_ids = torch.argmax(logits, dim=-1)
+                # Decode the speech
+                transcription = tokenizer.decode(predicted_ids[0])
+                # Show progress
+                st.progress(75)
+                # Summarization
+                summarizer = pipeline("summarization")
+                summary = summarizer(transcription, max_length=130, min_length=30, do_sample=False)
+                # Display the summary
+                st.success("Done!")
+                st.write("### Summary:")
+                st.write(summary[0]['summary_text'])
+                # Final progress
+                st.progress(100)
+            except Exception as e:
+                st.error(f"An error occurred: {e}")