Spaces:
Sleeping
Sleeping
import whisper | |
from transformers import pipeline | |
import gradio as gr | |
import numpy as np | |
import torch | |
from bark import generate_audio | |
from scipy.io.wavfile import write | |
import tempfile | |
from transformers import MarianMTModel, MarianTokenizer | |
# Funci贸n para transcribir el audio y traducir el audio de entrada | |
def transcribir_audio(audio): | |
# Usamos el pipeline de Hugging Face para la transcripci贸n | |
transcribir = pipeline("automatic-speech-recognition", model="openai/whisper-small") | |
result = transcribir(audio) | |
return result["text"] | |
def traducir_texto(texto): | |
model_name = "Helsinki-NLP/opus-mt-es-en" | |
tokenizer = MarianTokenizer.from_pretrained(model_name) | |
model = MarianMTModel.from_pretrained(model_name) | |
# Tokenizar el texto | |
inputs = tokenizer(texto, return_tensors="pt", padding=True, truncation=True) | |
# Generar la traducci贸n | |
translated = model.generate(**inputs) | |
# Decodificar la traducci贸n | |
traduccion = tokenizer.batch_decode(translated, skip_special_tokens=True)[0] | |
return traduccion | |
# Funci贸n para generar el audio | |
def generar_audio(text): | |
if not isinstance(text, str): | |
raise ValueError("El texto debe ser una cadena") | |
audio_array = generate_audio(text) | |
audio_array = np.clip(audio_array, -1.0, 1.0) | |
temp_wav = tempfile.NamedTemporaryFile(delete=False, suffix=".wav") | |
write(temp_wav.name, 24000, (audio_array * 32767).astype(np.int16)) | |
return temp_wav.name | |
def process_audio(audio_file): | |
try: | |
# Paso 1: Transcripci贸n con Whisper | |
transcripcion = transcribir_audio(audio_file) | |
# Paso 2: Traducci贸n con MarianMT | |
transcripcion_traducida = traducir_texto(transcripcion) | |
# Paso 3: Generaci贸n de audio con Bark | |
audio_sintetizado = generar_audio(transcripcion_traducida) | |
return transcripcion_traducida, audio_sintetizado | |
except Exception as e: | |
return str(e), None | |
# Crear interfaz Gradio | |
with gr.Blocks() as demo: | |
gr.Markdown("### Transcripci贸n y S铆ntesis de Voz") | |
with gr.Row(): | |
input_audio = gr.Audio(label="Sube tu archivo de audio", type="filepath") | |
transcription_output = gr.Textbox(label="Texto traducido al ingl茅s") | |
output_audio = gr.Audio(label="Audio generado") | |
process_button = gr.Button("Procesar") | |
process_button.click(process_audio, inputs=input_audio, outputs=[transcription_output, output_audio]) | |
# Lanzar la app | |
demo.launch(share=True) | |