Spaces:

cfc-tech
/

youtube_summarizer

Sleeping

App Files Files Community

cfc-tech commited on Apr 1, 2024

Commit

bdd5038

verified ·

1 Parent(s): 248c174

update

Browse files

Files changed (1) hide show

app.py +17 -13

app.py CHANGED Viewed

@@ -5,7 +5,7 @@ import torch
 from transformers import Wav2Vec2ForCTC, Wav2Vec2CTCTokenizer, pipeline, logging
 import librosa
-# Suppress warnings from transformers to clean up the output
 logging.set_verbosity_error()
 # Streamlit interface setup
@@ -21,8 +21,8 @@ if st.button('Summarize'):
             try:
                 # Download YouTube Video
                 yt = YouTube(youtube_link)
-                video = yt.streams.filter(only_audio=True).first()
-                download_path = video.download()
                 # Show progress
                 st.progress(25)
@@ -35,40 +35,44 @@ if st.button('Summarize'):
                 # Show progress
                 st.progress(50)
-                # Speech to Text using the corrected tokenizer
                 tokenizer = Wav2Vec2CTCTokenizer.from_pretrained("facebook/wav2vec2-base-960h")
                 model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
                 # Load and process the audio
                 speech, _ = librosa.load(audio_path, sr=16000)
                 input_values = tokenizer(speech, return_tensors="pt").input_values
                 logits = model(input_values).logits
                 predicted_ids = torch.argmax(logits, dim=-1)
-                # Decode the speech to text
                 transcription = tokenizer.batch_decode(predicted_ids, skip_special_tokens=True)[0]
                 # Ensure transcription is a string and not empty
                 if isinstance(transcription, str) and transcription.strip():
-                    st.write("Transcription:", transcription)  # Debugging print
                     # Show progress
                     st.progress(75)
-                    # Summarization
                     summarizer = pipeline("summarization")
                     try:
-                        summary = summarizer(transcription, max_length=130, min_length=30, do_sample=False)
                         st.success("Done!")
                         st.write("### Summary:")
                         st.write(summary[0]['summary_text'])
                         # Final progress
                         st.progress(100)
-                    except Exception as e:
-                        st.error(f"Error in summarization: {e}")  # More specific error message
                 else:
                     st.error("Could not transcribe audio or transcription is empty.")
-            except Exception as e:
-                st.error(f"An error occurred: {e}")

 from transformers import Wav2Vec2ForCTC, Wav2Vec2CTCTokenizer, pipeline, logging
 import librosa
+# Suppress warnings from transformers
 logging.set_verbosity_error()
 # Streamlit interface setup
             try:
                 # Download YouTube Video
                 yt = YouTube(youtube_link)
+                video = yt.streams.first()
+                download_path = video.download(skip_existing=True)
                 # Show progress
                 st.progress(25)
                 # Show progress
                 st.progress(50)
+                # Speech to Text
                 tokenizer = Wav2Vec2CTCTokenizer.from_pretrained("facebook/wav2vec2-base-960h")
                 model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
                 # Load and process the audio
                 speech, _ = librosa.load(audio_path, sr=16000)
                 input_values = tokenizer(speech, return_tensors="pt").input_values
                 logits = model(input_values).logits
                 predicted_ids = torch.argmax(logits, dim=-1)
+                # Decode the speech
                 transcription = tokenizer.batch_decode(predicted_ids, skip_special_tokens=True)[0]
                 # Ensure transcription is a string and not empty
                 if isinstance(transcription, str) and transcription.strip():
+                    st.write("Transcription:", transcription)  # For debugging
                     # Show progress
                     st.progress(75)
+                    # Initialize the summarizer
                     summarizer = pipeline("summarization")
+                    # Summarization
                     try:
+                        # Ensure the transcription is a string
+                        transcription_text = str(transcription)
+                        summary = summarizer(transcription_text, max_length=130, min_length=30, do_sample=False)
                         st.success("Done!")
                         st.write("### Summary:")
                         st.write(summary[0]['summary_text'])
                         # Final progress
                         st.progress(100)
+                    except Exception as summarization_error:
+                        st.error(f"Error during summarization: {summarization_error}")
                 else:
                     st.error("Could not transcribe audio or transcription is empty.")
+            except Exception as general_error:
+                st.error(f"An error occurred: {general_error}")