tonyliu404 commited on
Commit
dd4fe31
·
verified ·
1 Parent(s): a45489b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -12
app.py CHANGED
@@ -4,9 +4,15 @@ import numpy as np
4
  import librosa
5
  import gradio as gr
6
  from IPython.display import Audio as IPythonAudio
 
 
7
 
8
  asr = pipeline("automatic-speech-recognition", model="distil-whisper/distil-small.en") #sound to text model
9
 
 
 
 
 
10
  demo = gr.Blocks()
11
  def transcribe_long_form(filepath):
12
  if filepath is None:
@@ -28,30 +34,32 @@ def transcribe_long_form(filepath):
28
  chunk_length_s=30,
29
  batch_size=12,
30
  )
31
- return output["text"]
 
 
 
 
 
 
 
 
 
 
 
32
 
33
  mic_transcribe = gr.Interface(
34
  fn=transcribe_long_form,
35
  inputs=gr.Audio(sources="microphone",
36
  type="filepath"),
37
- outputs=gr.Textbox(label="Transcription",
38
- lines=3),
39
  allow_flagging="never")
40
 
41
  file_transcribe = gr.Interface(
42
  fn=transcribe_long_form,
43
  inputs=gr.Audio(sources="upload",
44
  type="filepath"),
45
- outputs=gr.Textbox(label="Transcription",
46
- lines=3),
47
  allow_flagging="never",
48
  )
49
 
50
- with demo:
51
- gr.TabbedInterface(
52
- [mic_transcribe,
53
- file_transcribe],
54
- ["Transcribe Microphone",
55
- "Transcribe Audio File"],
56
- )
57
  demo.launch()
 
4
  import librosa
5
  import gradio as gr
6
  from IPython.display import Audio as IPythonAudio
7
+ import torch
8
+ import tempfile
9
 
10
  asr = pipeline("automatic-speech-recognition", model="distil-whisper/distil-small.en") #sound to text model
11
 
12
+ tr = pipeline("translation", model="facebook/nllb-200-distilled-600M", torch_dtype=torch.bfloat16) #text translator model
13
+
14
+ narrator = pipeline("text-to-speech", model="facebook/mms-tts-spa") #text to speech spanish
15
+
16
  demo = gr.Blocks()
17
  def transcribe_long_form(filepath):
18
  if filepath is None:
 
34
  chunk_length_s=30,
35
  batch_size=12,
36
  )
37
+
38
+ text_translated = tr(output["text"],
39
+ src_lang="eng_Latn",
40
+ tgt_lang="spa_Latn")
41
+
42
+ completed_translation = text_translated[0]['translation_text']
43
+ narrated_text = narrator(completed_translation)
44
+
45
+ # Save the narrated audio to a temporary file
46
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".wav") as tmpfile:
47
+ sf.write(tmpfile.name, narrated_text['audio'][0], narrated_text['sampling_rate'])
48
+ return tmpfile.name
49
 
50
  mic_transcribe = gr.Interface(
51
  fn=transcribe_long_form,
52
  inputs=gr.Audio(sources="microphone",
53
  type="filepath"),
54
+ outputs=gr.Audio(label="Translated Audio"),
 
55
  allow_flagging="never")
56
 
57
  file_transcribe = gr.Interface(
58
  fn=transcribe_long_form,
59
  inputs=gr.Audio(sources="upload",
60
  type="filepath"),
61
+ outputs=gr.Audio(label="Translated Audio"),
 
62
  allow_flagging="never",
63
  )
64
 
 
 
 
 
 
 
 
65
  demo.launch()