|
import os |
|
import gradio as gr |
|
import whisper |
|
from groq import Groq |
|
from gtts import gTTS |
|
import tempfile |
|
|
|
|
|
os.environ["GROQ_API_KEY"] = "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" |
|
|
|
|
|
client = Groq(api_key=os.environ.get("GROQ_API_KEY")) |
|
|
|
|
|
whisper_model = whisper.load_model("base") |
|
|
|
|
|
def process_audio(audio_file): |
|
try: |
|
|
|
if not audio_file or not os.path.exists(audio_file): |
|
return "Error: Invalid audio input.", None |
|
|
|
print("Transcribing audio...") |
|
transcription_result = whisper_model.transcribe(audio_file) |
|
transcription = transcription_result.get("text", "").strip() |
|
print(f"Transcription: {transcription}") |
|
|
|
if not transcription: |
|
return "Error: Failed to transcribe audio.", None |
|
|
|
|
|
print("Interacting with Groq API...") |
|
response = client.chat.completions.create( |
|
messages=[{"role": "user", "content": transcription}], |
|
model="llama3-8b-8192", |
|
) |
|
response_text = response.choices[0].message.content.strip() |
|
print(f"Groq Response: {response_text}") |
|
|
|
if not response_text: |
|
return "Error: Groq API returned an empty response.", None |
|
|
|
|
|
print("Converting response to audio...") |
|
tts = gTTS(response_text) |
|
temp_audio_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") |
|
tts.save(temp_audio_file.name) |
|
|
|
return response_text, temp_audio_file.name |
|
except Exception as e: |
|
print(f"Error: {e}") |
|
return "Error occurred during processing.", None |
|
|
|
|
|
def chatbot(audio_file): |
|
response_text, audio_path = process_audio(audio_file) |
|
return response_text, audio_path |
|
|
|
with gr.Blocks() as demo: |
|
gr.Markdown("## Real-Time Voice-to-Voice Chatbot") |
|
with gr.Row(): |
|
audio_input = gr.Audio(type="filepath", label="Speak Now") |
|
text_output = gr.Textbox(label="Chatbot Response") |
|
audio_output = gr.Audio(label="Audio Response") |
|
|
|
submit_button = gr.Button("Submit") |
|
submit_button.click(chatbot, inputs=audio_input, outputs=[text_output, audio_output]) |
|
|
|
|
|
demo.launch() |