|
import os |
|
import whisper |
|
from gtts import gTTS |
|
import gradio as gr |
|
from groq import Groq |
|
|
|
|
|
whisper_model = whisper.load_model("large") |
|
|
|
|
|
GROQ_API_KEY = "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" |
|
client = Groq(api_key=GROQ_API_KEY) |
|
|
|
|
|
def text_to_speech(text): |
|
tts = gTTS(text=text, lang='en') |
|
response_audio_path = "response.mp3" |
|
tts.save(response_audio_path) |
|
return response_audio_path |
|
|
|
|
|
def transcribe_audio(audio): |
|
print("Transcribing audio...") |
|
result = whisper_model.transcribe(audio) |
|
return result["text"] |
|
|
|
|
|
def get_response_from_groq(input_text): |
|
chat_completion = client.chat.completions.create( |
|
messages=[{"role": "user", "content": input_text}], |
|
model="llama3-8b-8192", |
|
) |
|
response = chat_completion.choices[0].message.content |
|
return response |
|
|
|
|
|
def chatbot(audio): |
|
|
|
user_input = transcribe_audio(audio) |
|
print(f"User said: {user_input}") |
|
|
|
|
|
response = get_response_from_groq(user_input) |
|
print(f"Chatbot response: {response}") |
|
|
|
|
|
response_audio_path = text_to_speech(response) |
|
|
|
|
|
if os.path.exists(response_audio_path): |
|
return response, response_audio_path |
|
else: |
|
return response, None |
|
|
|
|
|
interface = gr.Interface( |
|
fn=chatbot, |
|
inputs=gr.Audio(type="filepath"), |
|
outputs=["text", gr.Audio(type="filepath")], |
|
live=True, |
|
title="Voice-Enabled Chatbot", |
|
description="Speak into your microphone, and the chatbot will respond with both text and audio." |
|
) |
|
|
|
|
|
interface.launch(share=True) |
|
|