|
import gradio as gr |
|
import speech_recognition as sr |
|
from pydub import AudioSegment |
|
|
|
|
|
def transcribe_audio(audio_input): |
|
recognizer = sr.Recognizer() |
|
audio_file = sr.AudioFile(audio_input) |
|
with audio_file as source: |
|
audio_data = recognizer.record(source) |
|
try: |
|
text = recognizer.recognize_google(audio_data) |
|
except sr.UnknownValueError: |
|
text = "Sorry, I couldn't understand the audio." |
|
except sr.RequestError: |
|
text = "Sorry, there was a problem with the request." |
|
return text |
|
|
|
|
|
def generate_response(user_input): |
|
|
|
text_response = f"Responding as Tommy Vercetti: {user_input}" |
|
|
|
output_path = "response.wav" |
|
|
|
return text_response, output_path |
|
|
|
|
|
def respond(audio_input): |
|
user_input = transcribe_audio(audio_input) |
|
text_response, output_path = generate_response(user_input) |
|
|
|
audio = AudioSegment.from_wav(output_path) |
|
duration = len(audio) / 1000 |
|
|
|
return text_response, output_path |
|
|
|
input_audio = gr.Audio( |
|
sources=["microphone"], |
|
waveform_options=gr.WaveformOptions( |
|
waveform_color="#01C6FF", |
|
waveform_progress_color="#0066B4", |
|
skip_length=2, |
|
show_controls=False, |
|
), |
|
) |
|
|
|
gr.Interface( |
|
fn=respond, |
|
inputs=input_audio, |
|
outputs=["text", "audio"], |
|
title="Tommy Vercetti Chatbot", |
|
description="Chat with Tommy Vercetti from GTA Vice City. Get responses in both text and voice!" |
|
).launch(debug=True) |
|
|