Spaces:

cfc-tech
/

youtube_summarizer

Sleeping

App Files Files Community

cfc-tech commited on Apr 1, 2024

Commit

ce3a3a9

verified ·

1 Parent(s): 27669e2

u

Browse files

Files changed (1) hide show

app.py +65 -18

app.py CHANGED Viewed

@@ -1,24 +1,71 @@
 import streamlit as st
-from transformers import pipeline, logging
 # Suppress warnings from transformers
 logging.set_verbosity_error()
 # Streamlit interface setup
-st.title("Summarization Test")
-# Initialize the summarizer
-summarizer = pipeline("summarization")
-# Test summarization with a hardcoded string
-test_text = "This is a simple test sentence to verify the functionality of the summarization pipeline. The goal is to ensure that the pipeline can process input text correctly and produce a summary without encountering the input type error."
-if st.button('Test Summarization'):
-    try:
-        # Attempt to summarize the hardcoded test text
-        summary = summarizer(test_text, max_length=130, min_length=30, do_sample=False)
-        st.success("Summarization succeeded!")
-        st.write("### Summary:")
-        st.write(summary[0]['summary_text'])
-    except Exception as e:
-        st.error(f"Summarization test failed: {e}")

 import streamlit as st
+from pytube import YouTube
+from moviepy.editor import *
+import torch
+from transformers import Wav2Vec2ForCTC, Wav2Vec2CTCTokenizer, pipeline, logging
+import librosa
 # Suppress warnings from transformers
 logging.set_verbosity_error()
 # Streamlit interface setup
+st.title("YouTube Video Summarizer")
+youtube_link = st.text_input("Enter YouTube Video Link:")
+if st.button('Summarize'):
+    if not youtube_link:
+        st.warning("Please enter a valid YouTube link.")
+    else:
+        with st.spinner("Processing..."):
+            try:
+                # Download YouTube Video
+                yt = YouTube(youtube_link)
+                video = yt.streams.filter(only_audio=True).first()
+                download_path = video.download()
+                # Show progress
+                st.progress(25)
+                # Extract Audio
+                video_clip = AudioFileClip(download_path)
+                audio_path = download_path.replace('.mp4', '.wav')
+                video_clip.write_audiofile(audio_path)
+                # Show progress
+                st.progress(50)
+                # Speech to Text
+                tokenizer = Wav2Vec2CTCTokenizer.from_pretrained("facebook/wav2vec2-base-960h")
+                model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
+                # Load and process the audio
+                speech, _ = librosa.load(audio_path, sr=16000)
+                input_values = tokenizer(speech, return_tensors="pt").input_values
+                logits = model(input_values).logits
+                predicted_ids = torch.argmax(logits, dim=-1)
+                # Decode the speech
+                transcription = tokenizer.batch_decode(predicted_ids, skip_special_tokens=True)[0]
+                # Ensure transcription is a string, not empty, and not too short
+                if isinstance(transcription, str) and transcription.strip() and len(transcription) > 50:
+                    # Show progress
+                    st.progress(75)
+                    # Initialize the summarizer
+                    summarizer = pipeline("summarization")
+                    # Summarization
+                    summary = summarizer(transcription, max_length=130, min_length=30, do_sample=False)
+                    st.success("Done!")
+                    st.write("### Summary:")
+                    st.write(summary[0]['summary_text'])
+                    # Final progress
+                    st.progress(100)
+                else:
+                    st.error("Transcription result is empty, too short, or not a string.")
+            except Exception as e:
+                st.error(f"An error occurred: {e}")