cfc-tech commited on
Commit
bdd5038
·
verified ·
1 Parent(s): 248c174
Files changed (1) hide show
  1. app.py +17 -13
app.py CHANGED
@@ -5,7 +5,7 @@ import torch
5
  from transformers import Wav2Vec2ForCTC, Wav2Vec2CTCTokenizer, pipeline, logging
6
  import librosa
7
 
8
- # Suppress warnings from transformers to clean up the output
9
  logging.set_verbosity_error()
10
 
11
  # Streamlit interface setup
@@ -21,8 +21,8 @@ if st.button('Summarize'):
21
  try:
22
  # Download YouTube Video
23
  yt = YouTube(youtube_link)
24
- video = yt.streams.filter(only_audio=True).first()
25
- download_path = video.download()
26
 
27
  # Show progress
28
  st.progress(25)
@@ -35,40 +35,44 @@ if st.button('Summarize'):
35
  # Show progress
36
  st.progress(50)
37
 
38
- # Speech to Text using the corrected tokenizer
39
  tokenizer = Wav2Vec2CTCTokenizer.from_pretrained("facebook/wav2vec2-base-960h")
40
  model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
41
-
42
  # Load and process the audio
43
  speech, _ = librosa.load(audio_path, sr=16000)
44
  input_values = tokenizer(speech, return_tensors="pt").input_values
45
  logits = model(input_values).logits
46
  predicted_ids = torch.argmax(logits, dim=-1)
47
 
48
- # Decode the speech to text
49
  transcription = tokenizer.batch_decode(predicted_ids, skip_special_tokens=True)[0]
50
 
51
  # Ensure transcription is a string and not empty
52
  if isinstance(transcription, str) and transcription.strip():
53
- st.write("Transcription:", transcription) # Debugging print
54
 
55
  # Show progress
56
  st.progress(75)
57
 
58
- # Summarization
59
  summarizer = pipeline("summarization")
 
 
60
  try:
61
- summary = summarizer(transcription, max_length=130, min_length=30, do_sample=False)
 
 
62
  st.success("Done!")
63
  st.write("### Summary:")
64
  st.write(summary[0]['summary_text'])
65
 
66
  # Final progress
67
  st.progress(100)
68
- except Exception as e:
69
- st.error(f"Error in summarization: {e}") # More specific error message
70
  else:
71
  st.error("Could not transcribe audio or transcription is empty.")
72
 
73
- except Exception as e:
74
- st.error(f"An error occurred: {e}")
 
5
  from transformers import Wav2Vec2ForCTC, Wav2Vec2CTCTokenizer, pipeline, logging
6
  import librosa
7
 
8
+ # Suppress warnings from transformers
9
  logging.set_verbosity_error()
10
 
11
  # Streamlit interface setup
 
21
  try:
22
  # Download YouTube Video
23
  yt = YouTube(youtube_link)
24
+ video = yt.streams.first()
25
+ download_path = video.download(skip_existing=True)
26
 
27
  # Show progress
28
  st.progress(25)
 
35
  # Show progress
36
  st.progress(50)
37
 
38
+ # Speech to Text
39
  tokenizer = Wav2Vec2CTCTokenizer.from_pretrained("facebook/wav2vec2-base-960h")
40
  model = Wav2Vec2ForCTC.from_pretrained("facebook/wav2vec2-base-960h")
41
+
42
  # Load and process the audio
43
  speech, _ = librosa.load(audio_path, sr=16000)
44
  input_values = tokenizer(speech, return_tensors="pt").input_values
45
  logits = model(input_values).logits
46
  predicted_ids = torch.argmax(logits, dim=-1)
47
 
48
+ # Decode the speech
49
  transcription = tokenizer.batch_decode(predicted_ids, skip_special_tokens=True)[0]
50
 
51
  # Ensure transcription is a string and not empty
52
  if isinstance(transcription, str) and transcription.strip():
53
+ st.write("Transcription:", transcription) # For debugging
54
 
55
  # Show progress
56
  st.progress(75)
57
 
58
+ # Initialize the summarizer
59
  summarizer = pipeline("summarization")
60
+
61
+ # Summarization
62
  try:
63
+ # Ensure the transcription is a string
64
+ transcription_text = str(transcription)
65
+ summary = summarizer(transcription_text, max_length=130, min_length=30, do_sample=False)
66
  st.success("Done!")
67
  st.write("### Summary:")
68
  st.write(summary[0]['summary_text'])
69
 
70
  # Final progress
71
  st.progress(100)
72
+ except Exception as summarization_error:
73
+ st.error(f"Error during summarization: {summarization_error}")
74
  else:
75
  st.error("Could not transcribe audio or transcription is empty.")
76
 
77
+ except Exception as general_error:
78
+ st.error(f"An error occurred: {general_error}")