ok / app.py
Pragnakal's picture
Update app.py
524b29a verified
raw
history blame
1.8 kB
import gradio as gr
import speech_recognition as sr
from pydub import AudioSegment
# Function to transcribe audio to text
def transcribe_audio(audio_input):
recognizer = sr.Recognizer()
audio_file = sr.AudioFile(audio_input)
with audio_file as source:
audio_data = recognizer.record(source)
try:
text = recognizer.recognize_google(audio_data)
except sr.UnknownValueError:
text = "Sorry, I couldn't understand the audio."
except sr.RequestError:
text = "Sorry, there was a problem with the request."
return text
# Function to generate a response (you'll need to implement this)
def generate_response(user_input):
# Placeholder for the text response generation and TTS part
text_response = f"Responding as Tommy Vercetti: {user_input}"
# Generate audio path based on text_response
output_path = "response.wav" # Placeholder path
# Implement TTS and save to output_path
return text_response, output_path
# Function to process the audio input and return both text and audio response
def respond(audio_input):
user_input = transcribe_audio(audio_input)
text_response, output_path = generate_response(user_input)
audio = AudioSegment.from_wav(output_path)
duration = len(audio) / 1000
return text_response, output_path
input_audio = gr.Audio(
sources=["microphone"],
waveform_options=gr.WaveformOptions(
waveform_color="#01C6FF",
waveform_progress_color="#0066B4",
skip_length=2,
show_controls=False,
),
)
gr.Interface(
fn=respond,
inputs=input_audio,
outputs=["text", "audio"],
title="Tommy Vercetti Chatbot",
description="Chat with Tommy Vercetti from GTA Vice City. Get responses in both text and voice!"
).launch(debug=True)