Spaces:

keelezibel
/

WhisperTT

Runtime error

LucFast commited on Jan 13, 2023

Commit

c357dd1

1 Parent(s): 3d11acf

update filepath to audio input

Files changed (1) hide show

app.py CHANGED Viewed

@@ -26,17 +26,16 @@ class GradioInference():
                     "preferredcodec": "wav",
                 }
             ],
-            "outtmpl": "tmp.wav",
         }
         with YoutubeDL(ydl_opts) as ydl:
             ydl.download(link)
-        return "tmp.wav"
     def detect_lang(self):
         # load audio and pad/trim it to fit 30 seconds
-        audio = whisper.load_audio("tmp.wav")
         audio_segment = whisper.pad_or_trim(audio)
         # make log-Mel spectrogram and move to the same device as the model
@@ -50,7 +49,7 @@ class GradioInference():
     def __call__(self, link, lang, size, subs):
         if self.yt is None:
-            ret_path = self.download_videos(link)
         if size != self.current_size:
             self.loaded_model = whisper.load_model(size)
@@ -66,7 +65,7 @@ class GradioInference():
         del options["task"]
         transcribe_options = dict(task="transcribe", **options)
         translate_options = dict(task="translate", **options)
-        results = self.loaded_model.transcribe("tmp.wav", language=lang)
         if subs == "None":
             return results["text"]

                     "preferredcodec": "wav",
                 }
             ],
+            "outtmpl": f"{os.path.curdir}/tmp.%(ext)s",
         }
         with YoutubeDL(ydl_opts) as ydl:
             ydl.download(link)
     def detect_lang(self):
         # load audio and pad/trim it to fit 30 seconds
+        audio = whisper.load_audio(f"{os.path.curdir}/tmp.wav")
         audio_segment = whisper.pad_or_trim(audio)
         # make log-Mel spectrogram and move to the same device as the model
     def __call__(self, link, lang, size, subs):
         if self.yt is None:
+            self.download_videos(link)
         if size != self.current_size:
             self.loaded_model = whisper.load_model(size)
         del options["task"]
         transcribe_options = dict(task="transcribe", **options)
         translate_options = dict(task="translate", **options)
+        results = self.loaded_model.transcribe(f"{os.path.curdir}/tmp.wav", **transcribe_options)
         if subs == "None":
             return results["text"]