cfc-tech commited on
Commit
ee8c3b8
·
verified ·
1 Parent(s): 296eb4c

first commit

Browse files
Files changed (1) hide show
  1. app.py +42 -68
app.py CHANGED
@@ -1,71 +1,45 @@
1
  import streamlit as st
2
  from pytube import YouTube
3
  from moviepy.editor import *
4
- import torch
5
- from transformers import Wav2Vec2ForCTC, Wav2Vec2CTCTokenizer, pipeline, logging
6
- import librosa
7
-
8
- # Suppress warnings from transformers
9
- logging.set_verbosity_error()
10
-
11
- # Streamlit interface setup
12
- st.title("YouTube Video Summarizer")
13
-
14
- youtube_link = st.text_input("Enter YouTube Video Link:")
15
-
16
- if st.button('Summarize'):
17
- if not youtube_link:
18
- st.warning("Please enter a valid YouTube link.")
19
- else:
20
- with st.spinner("Processing..."):
21
- try:
22
- # Download YouTube Video
23
- yt = YouTube(youtube_link)
24
- video = yt.streams.filter(only_audio=True).first()
25
- download_path = video.download()
26
-
27
- # Show progress
28
- st.progress(25)
29
-
30
- # Extract Audio
31
- video_clip = AudioFileClip(download_path)
32
- audio_path = download_path.replace('.mp4', '.wav')
33
- video_clip.write_audiofile(audio_path)
34
-
35
- # Show progress
36
- st.progress(50)
37
-
38
- # Speech to Text
39
- tokenizer = Wav2Vec2CTCTokenizer.from_pretrained("facebook/bart-large-cnn")
40
- model = Wav2Vec2ForCTC.from_pretrained("facebook/bart-large-cnn")
41
-
42
- # Load and process the audio
43
- speech, _ = librosa.load(audio_path, sr=16000)
44
- input_values = tokenizer(speech, return_tensors="pt").input_values
45
- logits = model(input_values).logits
46
- predicted_ids = torch.argmax(logits, dim=-1)
47
-
48
- # Decode the speech
49
- transcription = tokenizer.batch_decode(predicted_ids, skip_special_tokens=True)[0]
50
-
51
- # Ensure transcription is a string, not empty, and not too short
52
- if isinstance(transcription, str) and transcription.strip() and len(transcription) > 50:
53
- # Show progress
54
- st.progress(75)
55
-
56
- # Initialize the summarizer
57
- summarizer = pipeline("summarization")
58
-
59
- # Summarization
60
- summary = summarizer(transcription, max_length=130, min_length=30, do_sample=False)
61
- st.success("Done!")
62
- st.write("### Summary:")
63
- st.write(summary[0]['summary_text'])
64
-
65
- # Final progress
66
- st.progress(100)
67
- else:
68
- st.error("Transcription result is empty, too short, or not a string.")
69
-
70
- except Exception as e:
71
- st.error(f"An error occurred: {e}")
 
1
  import streamlit as st
2
  from pytube import YouTube
3
  from moviepy.editor import *
4
+ import speech_recognition as sr
5
+ from transformers import pipeline
6
+
7
+ def download_and_extract_audio(youtube_link):
8
+ yt = YouTube(youtube_link)
9
+ video = yt.streams.filter(only_audio=True).first()
10
+ out_file = video.download(output_path=".")
11
+ base, ext = os.path.splitext(out_file)
12
+ new_file = base + '.mp3'
13
+ os.rename(out_file, new_file)
14
+ return new_file
15
+
16
+ def transcribe_audio(audio_path):
17
+ recognizer = sr.Recognizer()
18
+ with sr.AudioFile(audio_path) as source:
19
+ audio_data = recognizer.record(source)
20
+ text = recognizer.recognize_google(audio_data)
21
+ return text
22
+
23
+ def summarize_text(text):
24
+ summarizer = pipeline("summarization", model="facebook/bart-large-cnn")
25
+ summary = summarizer(text, max_length=130, min_length=30, do_sample=False)
26
+ return summary[0]['summary_text']
27
+
28
+ def main():
29
+ st.title("YouTube Video Summary")
30
+ youtube_link = st.text_input("Enter YouTube Video Link:")
31
+ if st.button("Summarize"):
32
+ with st.spinner('Downloading and extracting audio...'):
33
+ audio_path = download_and_extract_audio(youtube_link)
34
+ with st.spinner('Transcribing audio to text...'):
35
+ transcription = transcribe_audio(audio_path)
36
+ with st.spinner('Summarizing transcription...'):
37
+ summary = summarize_text(transcription)
38
+
39
+ st.subheader("Transcription:")
40
+ st.write(transcription)
41
+ st.subheader("Summary:")
42
+ st.write(summary)
43
+
44
+ if __name__ == '__main__':
45
+ main()