|
|
|
import whisper |
|
import os |
|
from gtts import gTTS |
|
import gradio as gr |
|
from groq import Groq |
|
|
|
|
|
model = whisper.load_model("base") |
|
|
|
client = Groq(api_key=os.getenv("GROQ_API_KEY")) |
|
|
|
|
|
def get_llm_response(user_input): |
|
chat_completion = client.chat.completions.create( |
|
messages=[{"role": "user", "content": user_input}], |
|
model="llama3-8b-8192", |
|
) |
|
return chat_completion.choices[0].message.content |
|
|
|
|
|
def text_to_speech(text, output_audio="output_audio.mp3"): |
|
tts = gTTS(text) |
|
tts.save(output_audio) |
|
return output_audio |
|
|
|
|
|
def chatbot(audio): |
|
|
|
result = model.transcribe(audio) |
|
user_text = result["text"] |
|
|
|
|
|
response_text = get_llm_response(user_text) |
|
|
|
|
|
output_audio = text_to_speech(response_text) |
|
|
|
return response_text, output_audio |
|
|
|
|
|
iface = gr.Interface( |
|
fn=chatbot, |
|
inputs=gr.Audio(type="filepath"), |
|
outputs=[gr.Textbox(), gr.Audio(type="filepath")], |
|
live=True |
|
) |
|
|
|
|
|
iface.launch() |
|
|