File size: 2,426 Bytes
c448695 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 |
import os
import gradio as gr
import whisper
from groq import Groq
from gtts import gTTS
import tempfile
# Set up Groq API
os.environ["GROQ_API_KEY"] = "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"
# Initialize Groq client
client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
# Load Whisper model
whisper_model = whisper.load_model("base")
# Function to process audio input
def process_audio(audio_file):
try:
# Step 1: Transcribe audio to text using Whisper
if not audio_file or not os.path.exists(audio_file):
return "Error: Invalid audio input.", None
print("Transcribing audio...")
transcription_result = whisper_model.transcribe(audio_file)
transcription = transcription_result.get("text", "").strip()
print(f"Transcription: {transcription}")
if not transcription:
return "Error: Failed to transcribe audio.", None
# Step 2: Interact with Groq API
print("Interacting with Groq API...")
response = client.chat.completions.create(
messages=[{"role": "user", "content": transcription}],
model="llama3-8b-8192",
)
response_text = response.choices[0].message.content.strip()
print(f"Groq Response: {response_text}")
if not response_text:
return "Error: Groq API returned an empty response.", None
# Step 3: Convert response text to speech using gTTS
print("Converting response to audio...")
tts = gTTS(response_text)
temp_audio_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
tts.save(temp_audio_file.name)
return response_text, temp_audio_file.name
except Exception as e:
print(f"Error: {e}")
return "Error occurred during processing.", None
# Define Gradio interface
def chatbot(audio_file):
response_text, audio_path = process_audio(audio_file)
return response_text, audio_path
with gr.Blocks() as demo:
gr.Markdown("## Real-Time Voice-to-Voice Chatbot")
with gr.Row():
audio_input = gr.Audio(type="filepath", label="Speak Now")
text_output = gr.Textbox(label="Chatbot Response")
audio_output = gr.Audio(label="Audio Response")
submit_button = gr.Button("Submit")
submit_button.click(chatbot, inputs=audio_input, outputs=[text_output, audio_output])
# Launch Gradio app
demo.launch() |