Spaces:

Mendoza33
/

test-do-call

Runtime error

App Files Files Community

Mendoza33 commited on Jan 16

Commit

8b34f22

verified ·

1 Parent(s): 62cf18e

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -33

app.py CHANGED Viewed

@@ -1,50 +1,37 @@
-import gradio as gr
-from transformers import pipeline
 import torch
-import librosa
-import os
-# Custom imports for Kokoro-82M
-from models import build_model
 from kokoro import generate
-from IPython.display import Audio
-# Load pre-trained models
-stt_model = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
-nlp_model = pipeline("text-generation", model="sshleifer/tiny-gpt2")
-# Device setup (use GPU if available, otherwise fallback to CPU)
 device = 'cuda' if torch.cuda.is_available() else 'cpu'
-# Load Kokoro-82M model and voicepack
-def load_kokoro_model():
-    model = build_model('kokoro-v0_19.pth', device)  # Adjust with the model path
-    voice_name = 'af'  # Default voice is a 50-50 mix of Bella & Sarah
-    voicepack = torch.load(f'voices/{voice_name}.pt', weights_only=True).to(device)
-    return model, voicepack
-# Load the Kokoro model once when the app starts
-kokoro_model, kokoro_voicepack = load_kokoro_model()
-# Define the function to handle the full workflow
 def conversation(audio):
     # Step 1: Convert speech to text
-    audio_input, _ = librosa.load(audio, sr=16000)  # Ensure correct audio sample rate
-    text = stt_model(audio_input)["text"]
-    # Step 2: Generate a response using GPT-2
     response = nlp_model(text, max_length=50)[0]["generated_text"]
-    # Step 3: Convert response text to speech using Kokoro-82M
-    audio_response, _ = generate(kokoro_model, response, kokoro_voicepack, lang='af')  # Using 'af' as language (adjust if needed)
-    # Return transcription, AI response, and generated audio
-    return text, response, Audio(data=audio_response, rate=24000, autoplay=True)
 # Create Gradio Interface
 interface = gr.Interface(
     fn=conversation,
-    inputs=gr.Audio(source="microphone", type="filepath"),  # Microphone input for live audio
     outputs=[
         gr.Textbox(label="Transcription"),
         gr.Textbox(label="AI Response"),
@@ -53,4 +40,4 @@ interface = gr.Interface(
 )
 # Launch the app
-interface.launch(share=True)  # Set `share=True` if you want to share the app via a link

 import torch
+from transformers import pipeline
+import gradio as gr
+# Import Kokoro components
 from kokoro import generate
+from models import build_model
+# Set device (use GPU if available)
 device = 'cuda' if torch.cuda.is_available() else 'cpu'
+# Load Kokoro model and voicepack from the root directory
+MODEL = build_model('kokoro-v0_19.pth', device)
+VOICE_NAME = 'af'  # Default voice
+VOICEPACK = torch.load(f'{VOICE_NAME}.pt', weights_only=True).to(device)
+# Load pre-trained models for speech-to-text and text generation
+stt_model = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
+nlp_model = pipeline("text-generation", model="sshleifer/tiny-gpt2")
+# Define the conversation function
 def conversation(audio):
     # Step 1: Convert speech to text
+    text = stt_model(audio)["text"]
+    # Step 2: Generate a response
     response = nlp_model(text, max_length=50)[0]["generated_text"]
+    # Step 3: Convert response text to speech using Kokoro model
+    audio_response, out_ps = generate(MODEL, response, VOICEPACK, lang=VOICE_NAME)
+    return text, response, audio_response
 # Create Gradio Interface
 interface = gr.Interface(
     fn=conversation,
+    inputs=gr.Audio(source="microphone", type="filepath"),
     outputs=[
         gr.Textbox(label="Transcription"),
         gr.Textbox(label="AI Response"),
 )
 # Launch the app
+interface.launch()