sal-maq's picture
Update app.py
5251d1b verified
import os
import gradio as gr
import whisper
from gtts import gTTS
import io
from groq import Groq
# Initialize the Groq client
client = Groq(api_key=os.environ.get("GROQ_API_KEY"))
# Load the Whisper model
model = whisper.load_model("base")
def process_audio(file_path):
try:
# Load the audio file
audio = whisper.load_audio(file_path)
# Transcribe the audio using Whisper
result = model.transcribe(audio)
text = result["text"]
# Generate a response using Groq
chat_completion = client.chat.completions.create(
messages=[{"role": "user", "content": text}],
model="llama3-8b-8192", # Replace with the correct model if necessary
)
# Access the response using dot notation
response_message = chat_completion.choices[0].message.content.strip()
# Convert the response text to speech
tts = gTTS(response_message)
response_audio_io = io.BytesIO()
tts.write_to_fp(response_audio_io) # Save the audio to the BytesIO object
response_audio_io.seek(0)
# Save audio to a file to ensure it's generated correctly
response_audio_path = "response.mp3"
with open(response_audio_path, "wb") as audio_file:
audio_file.write(response_audio_io.getvalue())
# Return the response text and the path to the saved audio file
return response_message, response_audio_path
except Exception as e:
return f"An error occurred: {e}", None
# Create the Gradio interface with customized UI
with gr.Blocks() as demo:
gr.Markdown(
"""
<style>
.gradio-container {
font-family: Arial, sans-serif;
background-color: #f0f4c3; /* Light green background color */
border-radius: 10px;
padding: 20px;
box-shadow: 0 4px 12px rgba(0,0,0,0.2);
text-align: center;
}
.gradio-input, .gradio-output {
border-radius: 6px;
border: 1px solid #ddd;
padding: 10px;
}
.gradio-button {
background-color: #ff7043;
color: white;
border-radius: 6px;
border: none;
padding: 10px 20px; /* Adjusted padding */
font-size: 16px; /* Adjusted font size */
cursor: pointer;
}
.gradio-button:hover {
background-color: #e64a19;
}
.gradio-title {
font-size: 28px;
font-weight: bold;
margin-bottom: 20px;
color: #37474f;
}
.gradio-description {
font-size: 16px;
margin-bottom: 20px;
color: #616161;
}
</style>
"""
)
gr.Markdown("# Voice-to-Voice Chatbot\nDeveloped by Salman Maqbool ❤️")
gr.Markdown("Upload an audio file to interact with the voice-to-voice chatbot. The chatbot will transcribe the audio, generate a response, and provide a spoken reply.")
with gr.Row():
with gr.Column():
audio_input = gr.Audio(type="filepath", label="Upload Audio File")
submit_button = gr.Button("Submit")
with gr.Column():
response_text = gr.Textbox(label="Response Text", placeholder="The AI-generated response will appear here", lines=5)
response_audio = gr.Audio(label="Response Audio", type="filepath")
# Link the submit button to the process_audio function
submit_button.click(fn=process_audio, inputs=audio_input, outputs=[response_text, response_audio])
# Launch the Gradio app
demo.launch()