Spaces:
Runtime error
Runtime error
import torch | |
from transformers import pipeline | |
import gradio as gr | |
# Import Kokoro components | |
from kokoro import generate | |
from models import build_model | |
# Set device (use GPU if available) | |
device = 'cuda' if torch.cuda.is_available() else 'cpu' | |
# Load Kokoro model and voicepack from the root directory | |
MODEL = build_model('kokoro-v0_19.pth', device) | |
VOICE_NAME = 'af' # Default voice | |
VOICEPACK = torch.load(f'{VOICE_NAME}.pt', weights_only=True).to(device) | |
# Load pre-trained models for speech-to-text and text generation | |
stt_model = pipeline("automatic-speech-recognition", model="openai/whisper-tiny") | |
nlp_model = pipeline("text-generation", model="sshleifer/tiny-gpt2") | |
# Define the conversation function | |
def conversation(audio): | |
# Step 1: Convert speech to text | |
text = stt_model(audio)["text"] | |
# Step 2: Generate a response | |
response = nlp_model(text, max_length=50)[0]["generated_text"] | |
# Step 3: Convert response text to speech using Kokoro model | |
audio_response, out_ps = generate(MODEL, response, VOICEPACK, lang=VOICE_NAME) | |
return text, response, audio_response | |
# Create Gradio Interface | |
interface = gr.Interface( | |
fn=conversation, | |
inputs=gr.Audio(source="microphone", type="filepath"), | |
outputs=[ | |
gr.Textbox(label="Transcription"), | |
gr.Textbox(label="AI Response"), | |
gr.Audio(label="Generated Speech") | |
] | |
) | |
# Launch the app | |
interface.launch() | |