Spaces:

hamzaherry
/

voice-chat

Sleeping

File size: 2,026 Bytes

import os
import whisper
from groq import Groq
from gtts import gTTS
import tempfile
import gradio as gr

# Step 1: Set up Whisper for transcription
model = whisper.load_model("base")  # Ensure correct version of whisper is installed

# Function to transcribe audio using Whisper
def transcribe_audio(audio_file):
    result = model.transcribe(audio_file)
    return result["text"]

# Step 2: Set up Groq API for interacting with the LLM (e.g., Llama 3)
api_key = "your_groq_api_key"  # Replace with your actual API key
client = Groq(api_key=api_key)

# Function to get a response from the Groq LLM (Llama 3)
def get_groq_response(text):
    chat_completion = client.chat.completions.create(
        messages=[{"role": "user", "content": text}],
        model="llama3-8b-8192",  # Use any other model if you prefer
        stream=False
    )
    return chat_completion.choices[0].message.content

# Step 3: Convert text response from LLM to speech using GTTS
def text_to_speech(text):
    tts = gTTS(text)
    # Save the audio to a temporary file
    temp_file = tempfile.NamedTemporaryFile(delete=False)
    tts.save(temp_file.name)
    # Return the file path for Gradio to play the audio
    return temp_file.name

# Step 4: Integrate everything into a Gradio interface
def voice_chatbot(audio_input):
    # Step 1: Transcribe the audio using Whisper
    transcription = transcribe_audio(audio_input)
    # Step 2: Get response from Groq API using the transcription
    response = get_groq_response(transcription)
    # Step 3: Convert the response text to speech using GTTS
    audio_response = text_to_speech(response)
    # Return the audio response (Gradio will play it)
    return audio_response

# Step 5: Create the Gradio interface (microphone input and audio output)
iface = gr.Interface(fn=voice_chatbot,
                     inputs=gr.Audio(type="filepath"),  # No source="microphone" argument
                     outputs=gr.Audio(),
                     live=True)

# Launch the Gradio interface
iface.launch()