younes21000 commited on
Commit
a5ff31f
·
verified ·
1 Parent(s): 089db12

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -4
app.py CHANGED
@@ -1,5 +1,7 @@
1
  import gradio as gr
2
  import moviepy.editor as mp
 
 
3
  from transformers import pipeline
4
 
5
  # Load Whisper model for speech-to-text
@@ -35,14 +37,20 @@ def generate_subtitles(video_file, language_name):
35
 
36
  # Extract audio from video using moviepy
37
  video = mp.VideoFileClip(video_path)
 
38
  audio = video.audio
39
- audio.write_audiofile("temp_audio.wav", codec='pcm_s16le')
40
 
41
  print("Starting speech-to-text transcription")
42
 
43
- # Convert speech to text (ASR using Whisper)
44
- with open("temp_audio.wav", "rb") as audio_file:
45
- transcription = asr(audio_file)["text"]
 
 
 
 
 
46
 
47
  print("Starting translation")
48
 
 
1
  import gradio as gr
2
  import moviepy.editor as mp
3
+ import librosa
4
+ import numpy as np
5
  from transformers import pipeline
6
 
7
  # Load Whisper model for speech-to-text
 
37
 
38
  # Extract audio from video using moviepy
39
  video = mp.VideoFileClip(video_path)
40
+ audio_path = "temp_audio.wav"
41
  audio = video.audio
42
+ audio.write_audiofile(audio_path, codec='pcm_s16le')
43
 
44
  print("Starting speech-to-text transcription")
45
 
46
+ # Load the audio file as a waveform using librosa
47
+ waveform, sr = librosa.load(audio_path, sr=16000) # sr=16000 for Whisper
48
+
49
+ # Convert the waveform (numpy array) to a list of floats (required by Whisper)
50
+ waveform_list = waveform.tolist()
51
+
52
+ # Pass the waveform to Whisper's ASR model
53
+ transcription = asr(waveform_list)["text"]
54
 
55
  print("Starting translation")
56