|
import os |
|
import subprocess |
|
import openai |
|
import gradio as gr |
|
|
|
openai.api_key = "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" |
|
|
|
def transcribe(audio): |
|
with open(audio, "rb") as audio_file: |
|
transcript = openai.Audio.transcribe("whisper-1", audio_file) |
|
return transcript["text"] |
|
|
|
def generate_response(transcribed_text): |
|
response = openai.ChatCompletion.create( |
|
model="gpt-3.5-turbo", |
|
messages=[ |
|
{"role": "system", "content": "You are a helpful assistant."}, |
|
{"role": "user", "content": transcribed_text}, |
|
] |
|
) |
|
return response['choices'][0]['message']['content'] |
|
|
|
|
|
def inference(text): |
|
output_file = "tts_output.wav" |
|
cmd = ['tts', '--text', text, '--out_path', output_file] |
|
subprocess.run(cmd, check=True) |
|
return output_file |
|
|
|
def process_audio_and_respond(audio): |
|
text = transcribe(audio) |
|
response_text = generate_response(text) |
|
output_file = inference(response_text) |
|
return output_file |
|
|
|
demo = gr.Interface( |
|
process_audio_and_respond, |
|
gr.inputs.Audio(source="microphone", type="filepath", label="Speak your question"), |
|
gr.outputs.Audio(type="filepath", label="Answer"), |
|
title="AI Question Answering", |
|
description="Ask any question and get an AI-generated answer as audio output.", |
|
theme="compact", |
|
layout="vertical", |
|
allow_flagging=False, |
|
live=True, |
|
) |
|
|
|
demo.launch() |