Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -4,6 +4,8 @@ import gradio as gr
|
|
4 |
import yt_dlp as youtube_dl
|
5 |
from transformers import pipeline
|
6 |
from transformers.pipelines.audio_utils import ffmpeg_read
|
|
|
|
|
7 |
|
8 |
import tempfile
|
9 |
import os
|
@@ -23,12 +25,32 @@ pipe = pipeline(
|
|
23 |
)
|
24 |
|
25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
26 |
def transcribe(inputs, task):
|
27 |
if inputs is None:
|
28 |
raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
|
29 |
|
30 |
text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
|
31 |
-
|
|
|
32 |
|
33 |
|
34 |
def _return_yt_html_embed(yt_url):
|
@@ -88,6 +110,9 @@ def yt_transcribe(yt_url, task, max_filesize=75.0):
|
|
88 |
return html_embed_str, text
|
89 |
|
90 |
|
|
|
|
|
|
|
91 |
demo = gr.Blocks()
|
92 |
|
93 |
mf_transcribe = gr.Interface(
|
@@ -95,8 +120,10 @@ mf_transcribe = gr.Interface(
|
|
95 |
inputs=[
|
96 |
gr.inputs.Audio(source="microphone", type="filepath", optional=True),
|
97 |
gr.inputs.Radio(["transcribe", "translate"], label="Task", default="transcribe"),
|
|
|
|
|
98 |
],
|
99 |
-
outputs="text",
|
100 |
layout="horizontal",
|
101 |
theme="huggingface",
|
102 |
title="Whisper Large V2: Transcribe Audio",
|
@@ -113,6 +140,8 @@ file_transcribe = gr.Interface(
|
|
113 |
inputs=[
|
114 |
gr.inputs.Audio(source="upload", type="filepath", optional=True, label="Audio file"),
|
115 |
gr.inputs.Radio(["transcribe", "translate"], label="Task", default="transcribe"),
|
|
|
|
|
116 |
],
|
117 |
outputs="text",
|
118 |
layout="horizontal",
|
@@ -130,7 +159,9 @@ yt_transcribe = gr.Interface(
|
|
130 |
fn=yt_transcribe,
|
131 |
inputs=[
|
132 |
gr.inputs.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"),
|
133 |
-
gr.inputs.Radio(["transcribe", "translate"], label="Task", default="transcribe")
|
|
|
|
|
134 |
],
|
135 |
outputs=["html", "text"],
|
136 |
layout="horizontal",
|
|
|
4 |
import yt_dlp as youtube_dl
|
5 |
from transformers import pipeline
|
6 |
from transformers.pipelines.audio_utils import ffmpeg_read
|
7 |
+
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM, pipeline
|
8 |
+
from flores200_codes import flores_codes
|
9 |
|
10 |
import tempfile
|
11 |
import os
|
|
|
25 |
)
|
26 |
|
27 |
|
28 |
+
def load_translation_model():
|
29 |
+
model_name = 'facebook/nllb-200-distilled-1.3B'
|
30 |
+
model = AutoModelForSeq2SeqLM.from_pretrained(model_name)
|
31 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name)
|
32 |
+
return model, tokenizer
|
33 |
+
|
34 |
+
translation_model, translation_tokenizer = load_translation_model()
|
35 |
+
|
36 |
+
|
37 |
+
def translate_text(text, source_language, target_language):
|
38 |
+
source_code = flores_codes[source_language]
|
39 |
+
target_code = flores_codes[target_language]
|
40 |
+
|
41 |
+
translator = pipeline('translation', model=translation_model, tokenizer=translation_tokenizer, src_lang=source_code, tgt_lang=target_code)
|
42 |
+
output = translator(text, max_length=400)
|
43 |
+
return output[0]['translation_text']
|
44 |
+
|
45 |
+
|
46 |
+
|
47 |
def transcribe(inputs, task):
|
48 |
if inputs is None:
|
49 |
raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
|
50 |
|
51 |
text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
|
52 |
+
translated_text = translate_text(text, source_language, target_language)
|
53 |
+
return text, translated_text
|
54 |
|
55 |
|
56 |
def _return_yt_html_embed(yt_url):
|
|
|
110 |
return html_embed_str, text
|
111 |
|
112 |
|
113 |
+
lang_codes = list(flores_codes.keys())
|
114 |
+
|
115 |
+
|
116 |
demo = gr.Blocks()
|
117 |
|
118 |
mf_transcribe = gr.Interface(
|
|
|
120 |
inputs=[
|
121 |
gr.inputs.Audio(source="microphone", type="filepath", optional=True),
|
122 |
gr.inputs.Radio(["transcribe", "translate"], label="Task", default="transcribe"),
|
123 |
+
gr.inputs.Dropdown(lang_codes, default='English', label='Source Language'),
|
124 |
+
gr.inputs.Dropdown(lang_codes, default='French', label='Target Language'),
|
125 |
],
|
126 |
+
outputs=["text", "text"],
|
127 |
layout="horizontal",
|
128 |
theme="huggingface",
|
129 |
title="Whisper Large V2: Transcribe Audio",
|
|
|
140 |
inputs=[
|
141 |
gr.inputs.Audio(source="upload", type="filepath", optional=True, label="Audio file"),
|
142 |
gr.inputs.Radio(["transcribe", "translate"], label="Task", default="transcribe"),
|
143 |
+
gr.inputs.Dropdown(lang_codes, default='English', label='Source'),
|
144 |
+
gr.inputs.Dropdown(lang_codes, default='French', label='Target'),
|
145 |
],
|
146 |
outputs="text",
|
147 |
layout="horizontal",
|
|
|
159 |
fn=yt_transcribe,
|
160 |
inputs=[
|
161 |
gr.inputs.Textbox(lines=1, placeholder="Paste the URL to a YouTube video here", label="YouTube URL"),
|
162 |
+
gr.inputs.Radio(["transcribe", "translate"], label="Task", default="transcribe"),
|
163 |
+
gr.inputs.Dropdown(lang_codes, default='English', label='Source Language'),
|
164 |
+
gr.inputs.Dropdown(lang_codes, default='French', label='Target Language'),
|
165 |
],
|
166 |
outputs=["html", "text"],
|
167 |
layout="horizontal",
|