Spaces:

younes21000
/

DAI_Project

Sleeping

younes21000 commited on Oct 10, 2024

Commit

a5ff31f

verified ·

1 Parent(s): 089db12

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,5 +1,7 @@
 import gradio as gr
 import moviepy.editor as mp
 from transformers import pipeline
 # Load Whisper model for speech-to-text
@@ -35,14 +37,20 @@ def generate_subtitles(video_file, language_name):
         # Extract audio from video using moviepy
         video = mp.VideoFileClip(video_path)
         audio = video.audio
-        audio.write_audiofile("temp_audio.wav", codec='pcm_s16le')
         print("Starting speech-to-text transcription")
-        # Convert speech to text (ASR using Whisper)
-        with open("temp_audio.wav", "rb") as audio_file:
-            transcription = asr(audio_file)["text"]
         print("Starting translation")

 import gradio as gr
 import moviepy.editor as mp
+import librosa
+import numpy as np
 from transformers import pipeline
 # Load Whisper model for speech-to-text
         # Extract audio from video using moviepy
         video = mp.VideoFileClip(video_path)
+        audio_path = "temp_audio.wav"
         audio = video.audio
+        audio.write_audiofile(audio_path, codec='pcm_s16le')
         print("Starting speech-to-text transcription")
+        # Load the audio file as a waveform using librosa
+        waveform, sr = librosa.load(audio_path, sr=16000)  # sr=16000 for Whisper
+        # Convert the waveform (numpy array) to a list of floats (required by Whisper)
+        waveform_list = waveform.tolist()
+        # Pass the waveform to Whisper's ASR model
+        transcription = asr(waveform_list)["text"]
         print("Starting translation")