femifoly's picture
Update app.py
5721224
import gradio as gr
from googletrans import Translator
from transformers import pipeline
pipe = pipeline(model="femifoly/Spanish-Fine-Tune-whisper")
def translate_and_transcribe(audio):
translator = Translator()
# Transcribe Spanish audio to text
transcription = pipe(audio)["text"]
# Translate the transcription to English
result = translator.translate(transcription, dest='en')
translation = result.text
# Get the pronunciation of the transcription in Spanish
pronunciation = translator.translate(transcription, dest='es').pronunciation
return transcription, pronunciation, translation
input_audio = gr.inputs.Audio(label="Upload your Spanish speech here. Try to say 'Hola', 'Como estas' or perhaps 'Que tal'", source="microphone", type="filepath")
output_textbox1 = gr.outputs.Textbox(label="Transcription")
output_textbox2 = gr.outputs.Textbox(label="Pronunciation")
output_textbox3 = gr.outputs.Textbox(label="Translation")
iface = gr.Interface(
fn=translate_and_transcribe,
inputs=input_audio,
outputs=[output_textbox1, output_textbox2, output_textbox3],
title="Spanish Automatic Speech Recognition, Pronunciation and Translation",
description="Record Spanish speech to get its pronunciation and translate it to English. All done by using a fine-tuned version of the tiny Whisper model which is connected to a Google Translate API"
)
iface.launch()