Spaces:
Runtime error
Runtime error
File size: 1,423 Bytes
95daf41 8b34f22 95daf41 8b34f22 95daf41 8b34f22 1b48e7b 8b34f22 95daf41 8b34f22 95daf41 8b34f22 080b398 8b34f22 9a81fc4 8b34f22 95daf41 8b34f22 9a81fc4 8b34f22 9a81fc4 8b34f22 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
import torch
from transformers import pipeline
import gradio as gr
# Import Kokoro components
from kokoro import generate
from models import build_model
# Set device (use GPU if available)
device = 'cuda' if torch.cuda.is_available() else 'cpu'
# Load Kokoro model and voicepack from the root directory
MODEL = build_model('kokoro-v0_19.pth', device)
VOICE_NAME = 'af' # Default voice
VOICEPACK = torch.load(f'{VOICE_NAME}.pt', weights_only=True).to(device)
# Load pre-trained models for speech-to-text and text generation
stt_model = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
nlp_model = pipeline("text-generation", model="sshleifer/tiny-gpt2")
# Define the conversation function
def conversation(audio):
# Step 1: Convert speech to text
text = stt_model(audio)["text"]
# Step 2: Generate a response
response = nlp_model(text, max_length=50)[0]["generated_text"]
# Step 3: Convert response text to speech using Kokoro model
audio_response, out_ps = generate(MODEL, response, VOICEPACK, lang=VOICE_NAME)
return text, response, audio_response
# Create Gradio Interface
interface = gr.Interface(
fn=conversation,
inputs=gr.Audio(source="microphone", type="filepath"),
outputs=[
gr.Textbox(label="Transcription"),
gr.Textbox(label="AI Response"),
gr.Audio(label="Generated Speech")
]
)
# Launch the app
interface.launch()
|