cfc-tech commited on
Commit
27669e2
·
verified ·
1 Parent(s): bdd5038
Files changed (1) hide show
  1. app.py +18 -72
app.py CHANGED
@@ -1,78 +1,24 @@
1
  import streamlit as st
2
- from pytube import YouTube
3
- from moviepy.editor import *
4
- import torch
5
- from transformers import Wav2Vec2ForCTC, Wav2Vec2CTCTokenizer, pipeline, logging
6
- import librosa
7
 
8
  # Suppress warnings from transformers
9
  logging.set_verbosity_error()
10
 
11
  # Streamlit interface setup
12
- st.title("YouTube Video Summarizer")
13
-
14
- youtube_link = st.text_input("Enter YouTube Video Link:")
15
-
16
- if st.button('Summarize'):
17
- if not youtube_link:
18
- st.warning("Please enter a valid YouTube link.")
19
- else:
20
- with st.spinner("Processing..."):
21
- try:
22
- # Download YouTube Video
23
- yt = YouTube(youtube_link)
24
- video = yt.streams.first()
25
- download_path = video.download(skip_existing=True)
26
-
27
- # Show progress
28
- st.progress(25)
29
-
30
- # Extract Audio
31
- video_clip = AudioFileClip(download_path)
32
- audio_path = download_path.replace('.mp4', '.wav')
33
- video_clip.write_audiofile(audio_path)
34
-
35
- # Show progress
36
- st.progress(50)
37
-
38
- # Speech to Text
39
- tokenizer = Wav2Vec2CTCTokenizer.from_pretrained("facebook/wav2vec2-base-960h")
40
- model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
41
-
42
- # Load and process the audio
43
- speech, _ = librosa.load(audio_path, sr=16000)
44
- input_values = tokenizer(speech, return_tensors="pt").input_values
45
- logits = model(input_values).logits
46
- predicted_ids = torch.argmax(logits, dim=-1)
47
-
48
- # Decode the speech
49
- transcription = tokenizer.batch_decode(predicted_ids, skip_special_tokens=True)[0]
50
-
51
- # Ensure transcription is a string and not empty
52
- if isinstance(transcription, str) and transcription.strip():
53
- st.write("Transcription:", transcription) # For debugging
54
-
55
- # Show progress
56
- st.progress(75)
57
-
58
- # Initialize the summarizer
59
- summarizer = pipeline("summarization")
60
-
61
- # Summarization
62
- try:
63
- # Ensure the transcription is a string
64
- transcription_text = str(transcription)
65
- summary = summarizer(transcription_text, max_length=130, min_length=30, do_sample=False)
66
- st.success("Done!")
67
- st.write("### Summary:")
68
- st.write(summary[0]['summary_text'])
69
-
70
- # Final progress
71
- st.progress(100)
72
- except Exception as summarization_error:
73
- st.error(f"Error during summarization: {summarization_error}")
74
- else:
75
- st.error("Could not transcribe audio or transcription is empty.")
76
-
77
- except Exception as general_error:
78
- st.error(f"An error occurred: {general_error}")
 
1
  import streamlit as st
2
+ from transformers import pipeline, logging
 
 
 
 
3
 
4
  # Suppress warnings from transformers
5
  logging.set_verbosity_error()
6
 
7
  # Streamlit interface setup
8
+ st.title("Summarization Test")
9
+
10
+ # Initialize the summarizer
11
+ summarizer = pipeline("summarization")
12
+
13
+ # Test summarization with a hardcoded string
14
+ test_text = "This is a simple test sentence to verify the functionality of the summarization pipeline. The goal is to ensure that the pipeline can process input text correctly and produce a summary without encountering the input type error."
15
+
16
+ if st.button('Test Summarization'):
17
+ try:
18
+ # Attempt to summarize the hardcoded test text
19
+ summary = summarizer(test_text, max_length=130, min_length=30, do_sample=False)
20
+ st.success("Summarization succeeded!")
21
+ st.write("### Summary:")
22
+ st.write(summary[0]['summary_text'])
23
+ except Exception as e:
24
+ st.error(f"Summarization test failed: {e}")