cfc-tech commited on
Commit
ce3a3a9
·
verified ·
1 Parent(s): 27669e2
Files changed (1) hide show
  1. app.py +65 -18
app.py CHANGED
@@ -1,24 +1,71 @@
1
  import streamlit as st
2
- from transformers import pipeline, logging
 
 
 
 
3
 
4
  # Suppress warnings from transformers
5
  logging.set_verbosity_error()
6
 
7
  # Streamlit interface setup
8
- st.title("Summarization Test")
9
-
10
- # Initialize the summarizer
11
- summarizer = pipeline("summarization")
12
-
13
- # Test summarization with a hardcoded string
14
- test_text = "This is a simple test sentence to verify the functionality of the summarization pipeline. The goal is to ensure that the pipeline can process input text correctly and produce a summary without encountering the input type error."
15
-
16
- if st.button('Test Summarization'):
17
- try:
18
- # Attempt to summarize the hardcoded test text
19
- summary = summarizer(test_text, max_length=130, min_length=30, do_sample=False)
20
- st.success("Summarization succeeded!")
21
- st.write("### Summary:")
22
- st.write(summary[0]['summary_text'])
23
- except Exception as e:
24
- st.error(f"Summarization test failed: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
+ from pytube import YouTube
3
+ from moviepy.editor import *
4
+ import torch
5
+ from transformers import Wav2Vec2ForCTC, Wav2Vec2CTCTokenizer, pipeline, logging
6
+ import librosa
7
 
8
  # Suppress warnings from transformers
9
  logging.set_verbosity_error()
10
 
11
  # Streamlit interface setup
12
+ st.title("YouTube Video Summarizer")
13
+
14
+ youtube_link = st.text_input("Enter YouTube Video Link:")
15
+
16
+ if st.button('Summarize'):
17
+ if not youtube_link:
18
+ st.warning("Please enter a valid YouTube link.")
19
+ else:
20
+ with st.spinner("Processing..."):
21
+ try:
22
+ # Download YouTube Video
23
+ yt = YouTube(youtube_link)
24
+ video = yt.streams.filter(only_audio=True).first()
25
+ download_path = video.download()
26
+
27
+ # Show progress
28
+ st.progress(25)
29
+
30
+ # Extract Audio
31
+ video_clip = AudioFileClip(download_path)
32
+ audio_path = download_path.replace('.mp4', '.wav')
33
+ video_clip.write_audiofile(audio_path)
34
+
35
+ # Show progress
36
+ st.progress(50)
37
+
38
+ # Speech to Text
39
+ tokenizer = Wav2Vec2CTCTokenizer.from_pretrained("facebook/wav2vec2-base-960h")
40
+ model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
41
+
42
+ # Load and process the audio
43
+ speech, _ = librosa.load(audio_path, sr=16000)
44
+ input_values = tokenizer(speech, return_tensors="pt").input_values
45
+ logits = model(input_values).logits
46
+ predicted_ids = torch.argmax(logits, dim=-1)
47
+
48
+ # Decode the speech
49
+ transcription = tokenizer.batch_decode(predicted_ids, skip_special_tokens=True)[0]
50
+
51
+ # Ensure transcription is a string, not empty, and not too short
52
+ if isinstance(transcription, str) and transcription.strip() and len(transcription) > 50:
53
+ # Show progress
54
+ st.progress(75)
55
+
56
+ # Initialize the summarizer
57
+ summarizer = pipeline("summarization")
58
+
59
+ # Summarization
60
+ summary = summarizer(transcription, max_length=130, min_length=30, do_sample=False)
61
+ st.success("Done!")
62
+ st.write("### Summary:")
63
+ st.write(summary[0]['summary_text'])
64
+
65
+ # Final progress
66
+ st.progress(100)
67
+ else:
68
+ st.error("Transcription result is empty, too short, or not a string.")
69
+
70
+ except Exception as e:
71
+ st.error(f"An error occurred: {e}")