Mendoza33 commited on
Commit
8b34f22
·
verified ·
1 Parent(s): 62cf18e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -33
app.py CHANGED
@@ -1,50 +1,37 @@
1
- import gradio as gr
2
- from transformers import pipeline
3
  import torch
4
- import librosa
5
- import os
6
 
7
- # Custom imports for Kokoro-82M
8
- from models import build_model
9
  from kokoro import generate
10
- from IPython.display import Audio
11
-
12
- # Load pre-trained models
13
- stt_model = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
14
- nlp_model = pipeline("text-generation", model="sshleifer/tiny-gpt2")
15
 
16
- # Device setup (use GPU if available, otherwise fallback to CPU)
17
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
18
 
19
- # Load Kokoro-82M model and voicepack
20
- def load_kokoro_model():
21
- model = build_model('kokoro-v0_19.pth', device) # Adjust with the model path
22
- voice_name = 'af' # Default voice is a 50-50 mix of Bella & Sarah
23
- voicepack = torch.load(f'voices/{voice_name}.pt', weights_only=True).to(device)
24
- return model, voicepack
25
 
26
- # Load the Kokoro model once when the app starts
27
- kokoro_model, kokoro_voicepack = load_kokoro_model()
 
28
 
29
- # Define the function to handle the full workflow
30
  def conversation(audio):
31
  # Step 1: Convert speech to text
32
- audio_input, _ = librosa.load(audio, sr=16000) # Ensure correct audio sample rate
33
- text = stt_model(audio_input)["text"]
34
-
35
- # Step 2: Generate a response using GPT-2
36
  response = nlp_model(text, max_length=50)[0]["generated_text"]
37
-
38
- # Step 3: Convert response text to speech using Kokoro-82M
39
- audio_response, _ = generate(kokoro_model, response, kokoro_voicepack, lang='af') # Using 'af' as language (adjust if needed)
40
-
41
- # Return transcription, AI response, and generated audio
42
- return text, response, Audio(data=audio_response, rate=24000, autoplay=True)
43
 
44
  # Create Gradio Interface
45
  interface = gr.Interface(
46
  fn=conversation,
47
- inputs=gr.Audio(source="microphone", type="filepath"), # Microphone input for live audio
48
  outputs=[
49
  gr.Textbox(label="Transcription"),
50
  gr.Textbox(label="AI Response"),
@@ -53,4 +40,4 @@ interface = gr.Interface(
53
  )
54
 
55
  # Launch the app
56
- interface.launch(share=True) # Set `share=True` if you want to share the app via a link
 
 
 
1
  import torch
2
+ from transformers import pipeline
3
+ import gradio as gr
4
 
5
+ # Import Kokoro components
 
6
  from kokoro import generate
7
+ from models import build_model
 
 
 
 
8
 
9
+ # Set device (use GPU if available)
10
  device = 'cuda' if torch.cuda.is_available() else 'cpu'
11
 
12
+ # Load Kokoro model and voicepack from the root directory
13
+ MODEL = build_model('kokoro-v0_19.pth', device)
14
+ VOICE_NAME = 'af' # Default voice
15
+ VOICEPACK = torch.load(f'{VOICE_NAME}.pt', weights_only=True).to(device)
 
 
16
 
17
+ # Load pre-trained models for speech-to-text and text generation
18
+ stt_model = pipeline("automatic-speech-recognition", model="openai/whisper-tiny")
19
+ nlp_model = pipeline("text-generation", model="sshleifer/tiny-gpt2")
20
 
21
+ # Define the conversation function
22
  def conversation(audio):
23
  # Step 1: Convert speech to text
24
+ text = stt_model(audio)["text"]
25
+ # Step 2: Generate a response
 
 
26
  response = nlp_model(text, max_length=50)[0]["generated_text"]
27
+ # Step 3: Convert response text to speech using Kokoro model
28
+ audio_response, out_ps = generate(MODEL, response, VOICEPACK, lang=VOICE_NAME)
29
+ return text, response, audio_response
 
 
 
30
 
31
  # Create Gradio Interface
32
  interface = gr.Interface(
33
  fn=conversation,
34
+ inputs=gr.Audio(source="microphone", type="filepath"),
35
  outputs=[
36
  gr.Textbox(label="Transcription"),
37
  gr.Textbox(label="AI Response"),
 
40
  )
41
 
42
  # Launch the app
43
+ interface.launch()