File size: 2,843 Bytes
b5bfb01
fc53194
b5bfb01
 
 
225463a
b5bfb01
225463a
 
f8294c1
225463a
f8294c1
225463a
b5bfb01
 
 
 
225463a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b5bfb01
 
 
 
 
225463a
b5bfb01
225463a
 
fc53194
b5bfb01
 
 
 
 
 
 
 
 
 
 
 
 
 
225463a
b5bfb01
 
 
225463a
b5bfb01
 
 
 
 
 
 
 
 
 
 
 
 
 
fc53194
b5bfb01
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import os
import gradio as gr
import whisper
from gtts import gTTS
import io
from openai import OpenAI  # Import OpenAI for AI/ML API calls

# Set the base URL and API key for AI/ML API
base_url = "https://api.aimlapi.com/v1"
api_key = "701b35863e6d4a7b81bdcad2e6f3c880"  # Your API key

# Initialize the OpenAI API with the custom base URL and your API key
api = OpenAI(api_key=api_key, base_url=base_url)

# Load the Whisper model for audio transcription
model = whisper.load_model("base")

# Function to make a chat completion call to the AI/ML API
def call_aiml_api(user_prompt, system_prompt="You are a helpful assistant."):
    try:
        completion = api.chat.completions.create(
            model="mistralai/Mistral-7B-Instruct-v0.2",  # Specify the model from AI/ML
            messages=[
                {"role": "system", "content": system_prompt},
                {"role": "user", "content": user_prompt},
            ],
            temperature=0.7,
            max_tokens=256,
        )

        # Return the response from the AI model
        return completion.choices[0].message.content.strip()

    except Exception as e:
        raise Exception(f"API request failed with error: {e}")

# Function to process audio and interact with the AI/ML API
def process_audio(file_path):
    try:
        # Load and transcribe audio using Whisper
        audio = whisper.load_audio(file_path)
        result = model.transcribe(audio)
        user_prompt = result["text"]

        # Call AI/ML API to get a response
        response_message = call_aiml_api(user_prompt)

        # Convert response message to speech using gTTS
        tts = gTTS(response_message)
        response_audio_io = io.BytesIO()
        tts.write_to_fp(response_audio_io)  # Save the audio to BytesIO object
        response_audio_io.seek(0)

        # Save the audio file
        with open("response.mp3", "wb") as audio_file:
            audio_file.write(response_audio_io.getvalue())

        # Return the response text and audio file path
        return response_message, "response.mp3"

    except Exception as e:
        # Handle any errors
        return f"An error occurred: {e}", None

# Interface configurations (UI)
title = "Voice-to-Voice AI Chatbot with AI/ML API"
description = "Developed by [Adnan Tariq](https://www.linkedin.com/in/adnaantariq/) with ❤️"
article = "### Instructions\n1. Upload an audio file.\n2. Wait for the transcription.\n3. Listen to the chatbot's response."

# Gradio interface setup
iface = gr.Interface(
    fn=process_audio,
    inputs=gr.Audio(type="filepath"),  # Upload audio via file path
    outputs=[gr.Textbox(label="Response Text"), gr.Audio(label="Response Audio")],
    live=True,
    title=title,
    description=description,
    theme="dark",
    article=article
)

# Launch the Gradio app
iface.launch()