Spaces:
Runtime error
Runtime error
LucFast
commited on
Commit
·
a2d7dcb
1
Parent(s):
c357dd1
update audio path
Browse files
app.py
CHANGED
@@ -10,6 +10,7 @@ class GradioInference():
|
|
10 |
self.langs = ["none"] + sorted(list(whisper.tokenizer.LANGUAGES.values()))
|
11 |
self.current_size = "base"
|
12 |
self.loaded_model = whisper.load_model(self.current_size)
|
|
|
13 |
|
14 |
def download_videos(link):
|
15 |
"""Specify the yt-dlp parameters
|
@@ -32,10 +33,11 @@ class GradioInference():
|
|
32 |
with YoutubeDL(ydl_opts) as ydl:
|
33 |
ydl.download(link)
|
34 |
|
|
|
35 |
|
36 |
-
def detect_lang(self):
|
37 |
# load audio and pad/trim it to fit 30 seconds
|
38 |
-
audio = whisper.load_audio(
|
39 |
audio_segment = whisper.pad_or_trim(audio)
|
40 |
|
41 |
# make log-Mel spectrogram and move to the same device as the model
|
@@ -49,14 +51,15 @@ class GradioInference():
|
|
49 |
|
50 |
def __call__(self, link, lang, size, subs):
|
51 |
if self.yt is None:
|
52 |
-
self.
|
|
|
53 |
|
54 |
if size != self.current_size:
|
55 |
self.loaded_model = whisper.load_model(size)
|
56 |
self.current_size = size
|
57 |
|
58 |
if lang == "none":
|
59 |
-
lang = self.detect_lang()
|
60 |
|
61 |
options = whisper.DecodingOptions().__dict__.copy()
|
62 |
options["language"] = lang
|
@@ -65,7 +68,7 @@ class GradioInference():
|
|
65 |
del options["task"]
|
66 |
transcribe_options = dict(task="transcribe", **options)
|
67 |
translate_options = dict(task="translate", **options)
|
68 |
-
results = self.loaded_model.transcribe(
|
69 |
|
70 |
if subs == "None":
|
71 |
return results["text"]
|
|
|
10 |
self.langs = ["none"] + sorted(list(whisper.tokenizer.LANGUAGES.values()))
|
11 |
self.current_size = "base"
|
12 |
self.loaded_model = whisper.load_model(self.current_size)
|
13 |
+
self.yt = None
|
14 |
|
15 |
def download_videos(link):
|
16 |
"""Specify the yt-dlp parameters
|
|
|
33 |
with YoutubeDL(ydl_opts) as ydl:
|
34 |
ydl.download(link)
|
35 |
|
36 |
+
return f"{os.path.curdir}/tmp.wav"
|
37 |
|
38 |
+
def detect_lang(self, path):
|
39 |
# load audio and pad/trim it to fit 30 seconds
|
40 |
+
audio = whisper.load_audio(path)
|
41 |
audio_segment = whisper.pad_or_trim(audio)
|
42 |
|
43 |
# make log-Mel spectrogram and move to the same device as the model
|
|
|
51 |
|
52 |
def __call__(self, link, lang, size, subs):
|
53 |
if self.yt is None:
|
54 |
+
self.yt = YouTube(link)
|
55 |
+
path = self.download_videos(link)
|
56 |
|
57 |
if size != self.current_size:
|
58 |
self.loaded_model = whisper.load_model(size)
|
59 |
self.current_size = size
|
60 |
|
61 |
if lang == "none":
|
62 |
+
lang = self.detect_lang(path)
|
63 |
|
64 |
options = whisper.DecodingOptions().__dict__.copy()
|
65 |
options["language"] = lang
|
|
|
68 |
del options["task"]
|
69 |
transcribe_options = dict(task="transcribe", **options)
|
70 |
translate_options = dict(task="translate", **options)
|
71 |
+
results = self.loaded_model.transcribe(path, **transcribe_options)
|
72 |
|
73 |
if subs == "None":
|
74 |
return results["text"]
|