Voice-To-Voice_test

Sleeping

File size: 3,441 Bytes

e4c39da
 
 
 
2bcdf1f
e4c39da
 
2bcdf1f
 
e4c39da
2bcdf1f
 
e4c39da
2bcdf1f
e4c39da
2bcdf1f
 
e4c39da
2bcdf1f
 
 
e4c39da
2bcdf1f
 
 
 
e4c39da
 
2bcdf1f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e4c39da
2bcdf1f
e4c39da
2bcdf1f
e4c39da
 
 
 
 
 
2bcdf1f
e4c39da
 
 
2bcdf1f
e4c39da
 
 
 
 
 
 
2bcdf1f
e4c39da
 
 
2bcdf1f
e4c39da
2bcdf1f
e4c39da
 
2bcdf1f
e4c39da
 
2bcdf1f
e4c39da
 
2bcdf1f
e4c39da
 
2bcdf1f
e4c39da
2bcdf1f
e4c39da
 
 
 
2bcdf1f
 
e4c39da
2bcdf1f
e4c39da
 
2bcdf1f
 
e4c39da
 
2bcdf1f
 
e4c39da
 
2bcdf1f

import os
import gradio as gr
import whisper
from gtts import gTTS
import io
from groq import Groq

# Initialize the Groq client
client = Groq(api_key=os.environ.get("GROQ_API_KEY"))

# Load the Whisper model
model = whisper.load_model("base")

def process_audio(file_path):
    try:
        # Load the audio file
        audio = whisper.load_audio(file_path)

        # Transcribe the audio using Whisper
        result = model.transcribe(audio)
        text = result["text"]

        # Generate a response using Groq
        chat_completion = client.chat.completions.create(
            messages=[{"role": "user", "content": text}],
            model="llama3-8b-8192",  # Replace with the correct model if necessary
        )

        # Access the response using dot notation
        response_message = chat_completion.choices[0].message.content.strip()

        # Convert the response text to speech
        tts = gTTS(response_message)
        response_audio_io = io.BytesIO()
        tts.write_to_fp(response_audio_io)  # Save the audio to the BytesIO object
        response_audio_io.seek(0)

        # Save audio to a file to ensure it's generated correctly
        response_audio_path = "response.mp3"
        with open(response_audio_path, "wb") as audio_file:
            audio_file.write(response_audio_io.getvalue())

        # Return the response text and the path to the saved audio file
        return response_message, response_audio_path

    except Exception as e:
        return f"An error occurred: {e}", None

# Create the Gradio interface with customized UI
with gr.Blocks() as demo:
    gr.Markdown(
        """
        <style>
        .gradio-container {
            font-family: Arial, sans-serif;
            background-color: #f0f4c3;  /* Light green background color */
            border-radius: 10px;
            padding: 20px;
            box-shadow: 0 4px 12px rgba(0,0,0,0.2);
            text-align: center;
        }
        .gradio-input, .gradio-output {
            border-radius: 6px;
            border: 1px solid #ddd;
            padding: 10px;
        }
        .gradio-button {
            background-color: #ff7043;
            color: white;
            border-radius: 6px;
            border: none;
            padding: 10px 20px;  /* Adjusted padding */
            font-size: 16px;  /* Adjusted font size */
            cursor: pointer;
        }
        .gradio-button:hover {
            background-color: #e64a19;
        }
        .gradio-title {
            font-size: 28px;
            font-weight: bold;
            margin-bottom: 20px;
            color: #37474f;
        }
        .gradio-description {
            font-size: 16px;
            margin-bottom: 20px;
            color: #616161;
        }
        </style>
        """
    )

    gr.Markdown("# Voice-to-Voice Chatbot\nDeveloped by Salman Maqbool ❤️")
    gr.Markdown("Upload an audio file to interact with the voice-to-voice chatbot. The chatbot will transcribe the audio, generate a response, and provide a spoken reply.")

    with gr.Row():
        with gr.Column():
            gr.Audio(type="filepath", label="Upload Audio File")
            gr.Button("Submit")
        
        with gr.Column():
            gr.Textbox(label="Response Text", placeholder="The AI-generated response will appear here", lines=5)
            gr.Audio(label="Response Audio", type="filepath")

# Launch the Gradio app
demo.launch()