File size: 2,792 Bytes
2954152
 
 
 
 
bf9bd79
a7d008d
 
 
 
2954152
 
 
bf9bd79
2954152
 
 
 
 
e1a0aa9
2954152
 
 
e1a0aa9
 
 
 
51f7222
 
 
2954152
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bf9bd79
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
import os
import gradio as gr
import whisper
from gtts import gTTS
from groq import Groq

# Load the Groq API key from the environment variable
api_key = os.getenv("GROQ_API_KEY")
if not api_key:
    raise ValueError("No API key found. Please set the GROQ_API_KEY environment variable.")

# Initialize Whisper and Groq
whisper_model = whisper.load_model("base")
client = Groq(api_key=api_key)

def chatbot(audio_input):
    try:
        # Debug: Check the type and content of audio_input
        print(f"Audio input type: {type(audio_input)}")
        
        if audio_input is None:
            raise ValueError("Audio input is None. Please provide a valid audio file.")

        # Debug: Check if the file exists
        if not os.path.exists(audio_input):
            raise FileNotFoundError(f"Audio file {audio_input} not found.")

        # Step 1: Load and transcribe audio input using Whisper
        audio = whisper.load_audio(audio_input)
        transcription_result = whisper_model.transcribe(audio)
        if transcription_result is None or not transcription_result.get("text"):
            raise ValueError("Whisper transcription failed or returned empty text.")

        transcription = transcription_result["text"]

        # Step 2: Generate response using LLaMA 8B model via Groq API
        chat_completion = client.chat.completions.create(
            messages=[
                {
                    "role": "user",
                    "content": transcription,
                }
            ],
            model="llama3-8b-8192",
        )

        # Check if the response from Groq is valid
        if chat_completion and chat_completion.choices:
            response_text = chat_completion.choices[0].message.content
        else:
            raise ValueError("Invalid response from Groq API")

        # Step 3: Convert text response to speech using GTTS
        if response_text.strip():
            tts = gTTS(response_text)
            tts.save("response.mp3")
        else:
            raise ValueError("Response text is empty or invalid")

        # Step 4: Return the response audio and text transcription
        return "response.mp3", transcription, response_text

    except Exception as e:
        # Handle and display the error
        return None, transcription if 'transcription' in locals() else None, f"Error: {str(e)}"

# Define the Gradio interface
interface = gr.Interface(
    fn=chatbot,
    inputs=gr.Audio(type="filepath"),
    outputs=[gr.Audio(type="filepath"), "text", "text"],
    title="Voice-to-Voice Chatbot",
    description="Speak to the chatbot and get a real-time response.",
    live=True  # Automatically processes input without requiring a button click
)

# Launch the Gradio app
if __name__ == "__main__":
    interface.launch()