File size: 2,803 Bytes
b633ce6
2a120e6
9a914d3
 
b633ce6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a41bcbc
b633ce6
 
 
 
d970a67
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# Install necessary libraries
#pip uninstall -y whisper
#pip install git+https://github.com/openai/whisper.git
#pip install gradio gtts groq ffmpeg-python

# Import required libraries
import os
import gradio as gr
import whisper
from gtts import gTTS
import io
from groq import Groq

# Set your GROQ_API_KEY
os.environ["GROQ_API_KEY"] = "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"

# Initialize Groq client and Whisper model
client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
model = whisper.load_model("base", device="cpu")

# Function to process audio
def process_audio(file_path):
    try:
        # Ensure the file exists
        if not os.path.isfile(file_path):
            raise FileNotFoundError(f"The file {file_path} does not exist.")

        print(f"Processing file: {file_path}")

        # Load and process the audio with Whisper
        audio = whisper.load_audio(file_path)
        print("Audio loaded successfully.")

        # Transcribe the audio
        result = model.transcribe(audio)
        text = result["text"]
        print("Transcription:", text)

        # Generate a response using Groq API
        chat_completion = client.chat.completions.create(
            messages=[{"role": "user", "content": text}],
            model="llama3-8b-8192",
        )
        response_message = chat_completion.choices[0].message.content.strip()
        print("Chatbot response:", response_message)

        # Convert the response to audio
        tts = gTTS(response_message)
        response_audio_io = io.BytesIO()
        tts.write_to_fp(response_audio_io)
        response_audio_io.seek(0)

        # Save the response audio to a file
        response_audio_path = "response.mp3"
        with open(response_audio_path, "wb") as audio_file:
            audio_file.write(response_audio_io.getvalue())

        return response_message, response_audio_path

    except FileNotFoundError as e:
        return f"File not found: {e}", None
    except UnicodeDecodeError as e:
        return f"Invalid audio file encoding: {e}", None
    except Exception as e:
        return f"An unexpected error occurred: {e}", None

# Define Gradio interface
title = "Voice-to-Voice Chatbot Application"
description = "Run a voice-to-voice chatbot with transcription and audio response."
article = "### Instructions\n1. Upload an audio file.\n2. Wait for transcription and chatbot's response.\n3. Listen to the response audio."

iface = gr.Interface(
    fn=process_audio,
    inputs=gr.Audio(type="filepath", label="Upload an Audio File"),
    outputs=[
        gr.Textbox(label="Response Text"),
        gr.Audio(label="Response Audio")
    ],
    live=True,
    title=title,
    description=description,
    
    article=article
)

# Launch Gradio interface
iface.launch(share=True)