File size: 2,426 Bytes
c448695
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import os
import gradio as gr
import whisper
from groq import Groq
from gtts import gTTS
import tempfile

# Set up Groq API
os.environ["GROQ_API_KEY"] = "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"

# Initialize Groq client
client = Groq(api_key=os.environ.get("GROQ_API_KEY"))

# Load Whisper model
whisper_model = whisper.load_model("base")

# Function to process audio input
def process_audio(audio_file):
    try:
        # Step 1: Transcribe audio to text using Whisper
        if not audio_file or not os.path.exists(audio_file):
            return "Error: Invalid audio input.", None

        print("Transcribing audio...")
        transcription_result = whisper_model.transcribe(audio_file)
        transcription = transcription_result.get("text", "").strip()
        print(f"Transcription: {transcription}")

        if not transcription:
            return "Error: Failed to transcribe audio.", None

        # Step 2: Interact with Groq API
        print("Interacting with Groq API...")
        response = client.chat.completions.create(
            messages=[{"role": "user", "content": transcription}],
            model="llama3-8b-8192",
        )
        response_text = response.choices[0].message.content.strip()
        print(f"Groq Response: {response_text}")

        if not response_text:
            return "Error: Groq API returned an empty response.", None

        # Step 3: Convert response text to speech using gTTS
        print("Converting response to audio...")
        tts = gTTS(response_text)
        temp_audio_file = tempfile.NamedTemporaryFile(delete=False, suffix=".mp3")
        tts.save(temp_audio_file.name)

        return response_text, temp_audio_file.name
    except Exception as e:
        print(f"Error: {e}")
        return "Error occurred during processing.", None

# Define Gradio interface
def chatbot(audio_file):
    response_text, audio_path = process_audio(audio_file)
    return response_text, audio_path

with gr.Blocks() as demo:
    gr.Markdown("## Real-Time Voice-to-Voice Chatbot")
    with gr.Row():
        audio_input = gr.Audio(type="filepath", label="Speak Now")
        text_output = gr.Textbox(label="Chatbot Response")
        audio_output = gr.Audio(label="Audio Response")
    
    submit_button = gr.Button("Submit")
    submit_button.click(chatbot, inputs=audio_input, outputs=[text_output, audio_output])

# Launch Gradio app
demo.launch()