Spaces:

Mendoza33
/

test-do-call

Runtime error

App Files Files Community

Mendoza33 commited on Jan 16

Commit

95daf41

verified ·

1 Parent(s): ac1d4d3

Update app.py

Browse files

Files changed (1) hide show

app.py +31 -16

app.py CHANGED Viewed

@@ -1,35 +1,50 @@
-from transformers import pipeline
 import gradio as gr
 # Load pre-trained models
 stt_model = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
 nlp_model = pipeline("text-generation", model="sshleifer/tiny-gpt2")
-# Attempt to load Kokoro-82M for TTS (this might require adjustments depending on model support)
-tts_model = pipeline("text-to-speech", model="hexgrad/Kokoro-82M")
-# Define a function to handle the workflow
 def conversation(audio):
     # Step 1: Convert speech to text
-    text = stt_model(audio)["text"]
-    # Step 2: Generate a response (contextual supermarket-related training)
-    if "supermarket" in text.lower():
-        # Simple supermarket-based response; this can be expanded with more specific data
-        response = "Are you looking for something in particular at the supermarket?"
-    else:
-        # Default response generation (using GPT-2 model)
-        response = nlp_model(text, max_length=50)[0]["generated_text"]
     # Step 3: Convert response text to speech using Kokoro-82M
-    tts_audio = tts_model(response)
-    return text, response, tts_audio
 # Create Gradio Interface
 interface = gr.Interface(
     fn=conversation,
-    inputs=gr.Audio(source="microphone", type="filepath"),
     outputs=[
         gr.Textbox(label="Transcription"),
         gr.Textbox(label="AI Response"),
@@ -38,4 +53,4 @@ interface = gr.Interface(
 )
 # Launch the app
-interface.launch()

 import gradio as gr
+from transformers import pipeline
+import torch
+import librosa
+import os
+# Custom imports for Kokoro-82M
+from models import build_model
+from kokoro import generate
+from IPython.display import Audio
 # Load pre-trained models
 stt_model = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
 nlp_model = pipeline("text-generation", model="sshleifer/tiny-gpt2")
+# Device setup (use GPU if available, otherwise fallback to CPU)
+device = 'cuda' if torch.cuda.is_available() else 'cpu'
+# Load Kokoro-82M model and voicepack
+def load_kokoro_model():
+    model = build_model('kokoro-v0_19.pth', device)  # Adjust with the model path
+    voice_name = 'af'  # Default voice is a 50-50 mix of Bella & Sarah
+    voicepack = torch.load(f'voices/{voice_name}.pt', weights_only=True).to(device)
+    return model, voicepack
+# Load the Kokoro model once when the app starts
+kokoro_model, kokoro_voicepack = load_kokoro_model()
+# Define the function to handle the full workflow
 def conversation(audio):
     # Step 1: Convert speech to text
+    audio_input, _ = librosa.load(audio, sr=16000)  # Ensure correct audio sample rate
+    text = stt_model(audio_input)["text"]
+    # Step 2: Generate a response using GPT-2
+    response = nlp_model(text, max_length=50)[0]["generated_text"]
     # Step 3: Convert response text to speech using Kokoro-82M
+    audio_response, _ = generate(kokoro_model, response, kokoro_voicepack, lang='af')  # Using 'af' as language (adjust if needed)
+    # Return transcription, AI response, and generated audio
+    return text, response, Audio(data=audio_response, rate=24000, autoplay=True)
 # Create Gradio Interface
 interface = gr.Interface(
     fn=conversation,
+    inputs=gr.Audio(source="microphone", type="filepath"),  # Microphone input for live audio
     outputs=[
         gr.Textbox(label="Transcription"),
         gr.Textbox(label="AI Response"),
 )
 # Launch the app
+interface.launch(share=True)  # Set `share=True` if you want to share the app via a link