Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -4,9 +4,15 @@ import numpy as np
|
|
4 |
import librosa
|
5 |
import gradio as gr
|
6 |
from IPython.display import Audio as IPythonAudio
|
|
|
|
|
7 |
|
8 |
asr = pipeline("automatic-speech-recognition", model="distil-whisper/distil-small.en") #sound to text model
|
9 |
|
|
|
|
|
|
|
|
|
10 |
demo = gr.Blocks()
|
11 |
def transcribe_long_form(filepath):
|
12 |
if filepath is None:
|
@@ -28,30 +34,32 @@ def transcribe_long_form(filepath):
|
|
28 |
chunk_length_s=30,
|
29 |
batch_size=12,
|
30 |
)
|
31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
|
33 |
mic_transcribe = gr.Interface(
|
34 |
fn=transcribe_long_form,
|
35 |
inputs=gr.Audio(sources="microphone",
|
36 |
type="filepath"),
|
37 |
-
outputs=gr.
|
38 |
-
lines=3),
|
39 |
allow_flagging="never")
|
40 |
|
41 |
file_transcribe = gr.Interface(
|
42 |
fn=transcribe_long_form,
|
43 |
inputs=gr.Audio(sources="upload",
|
44 |
type="filepath"),
|
45 |
-
outputs=gr.
|
46 |
-
lines=3),
|
47 |
allow_flagging="never",
|
48 |
)
|
49 |
|
50 |
-
with demo:
|
51 |
-
gr.TabbedInterface(
|
52 |
-
[mic_transcribe,
|
53 |
-
file_transcribe],
|
54 |
-
["Transcribe Microphone",
|
55 |
-
"Transcribe Audio File"],
|
56 |
-
)
|
57 |
demo.launch()
|
|
|
4 |
import librosa
|
5 |
import gradio as gr
|
6 |
from IPython.display import Audio as IPythonAudio
|
7 |
+
import torch
|
8 |
+
import tempfile
|
9 |
|
10 |
asr = pipeline("automatic-speech-recognition", model="distil-whisper/distil-small.en") #sound to text model
|
11 |
|
12 |
+
tr = pipeline("translation", model="facebook/nllb-200-distilled-600M", torch_dtype=torch.bfloat16) #text translator model
|
13 |
+
|
14 |
+
narrator = pipeline("text-to-speech", model="facebook/mms-tts-spa") #text to speech spanish
|
15 |
+
|
16 |
demo = gr.Blocks()
|
17 |
def transcribe_long_form(filepath):
|
18 |
if filepath is None:
|
|
|
34 |
chunk_length_s=30,
|
35 |
batch_size=12,
|
36 |
)
|
37 |
+
|
38 |
+
text_translated = tr(output["text"],
|
39 |
+
src_lang="eng_Latn",
|
40 |
+
tgt_lang="spa_Latn")
|
41 |
+
|
42 |
+
completed_translation = text_translated[0]['translation_text']
|
43 |
+
narrated_text = narrator(completed_translation)
|
44 |
+
|
45 |
+
# Save the narrated audio to a temporary file
|
46 |
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmpfile:
|
47 |
+
sf.write(tmpfile.name, narrated_text['audio'][0], narrated_text['sampling_rate'])
|
48 |
+
return tmpfile.name
|
49 |
|
50 |
mic_transcribe = gr.Interface(
|
51 |
fn=transcribe_long_form,
|
52 |
inputs=gr.Audio(sources="microphone",
|
53 |
type="filepath"),
|
54 |
+
outputs=gr.Audio(label="Translated Audio"),
|
|
|
55 |
allow_flagging="never")
|
56 |
|
57 |
file_transcribe = gr.Interface(
|
58 |
fn=transcribe_long_form,
|
59 |
inputs=gr.Audio(sources="upload",
|
60 |
type="filepath"),
|
61 |
+
outputs=gr.Audio(label="Translated Audio"),
|
|
|
62 |
allow_flagging="never",
|
63 |
)
|
64 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
65 |
demo.launch()
|