File size: 1,427 Bytes
09f23d1
 
 
 
5721224
09f23d1
 
 
 
5721224
09f23d1
 
 
 
 
 
5721224
 
09f23d1
 
 
5721224
09f23d1
 
 
 
 
 
 
 
5721224
 
09f23d1
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
import gradio as gr
from googletrans import Translator
from transformers import pipeline

pipe = pipeline(model="femifoly/Spanish-Fine-Tune-whisper")

def translate_and_transcribe(audio):
    translator = Translator()
    
    # Transcribe Spanish audio to text
    transcription = pipe(audio)["text"]  

    # Translate the transcription to English
    result = translator.translate(transcription, dest='en')
    translation = result.text

    # Get the pronunciation of the transcription in Spanish
    pronunciation = translator.translate(transcription, dest='es').pronunciation

    return transcription, pronunciation, translation

input_audio = gr.inputs.Audio(label="Upload your Spanish speech here. Try to say 'Hola', 'Como estas' or perhaps 'Que tal'", source="microphone", type="filepath")
output_textbox1 = gr.outputs.Textbox(label="Transcription")
output_textbox2 = gr.outputs.Textbox(label="Pronunciation")
output_textbox3 = gr.outputs.Textbox(label="Translation")

iface = gr.Interface(
    fn=translate_and_transcribe, 
    inputs=input_audio, 
    outputs=[output_textbox1, output_textbox2, output_textbox3],
    title="Spanish Automatic Speech Recognition, Pronunciation and Translation",
    description="Record Spanish speech to get its pronunciation and translate it to English. All done by using a fine-tuned version of the tiny Whisper model which is connected to a Google Translate API"
)

iface.launch()