|
import os |
|
import subprocess |
|
import openai |
|
import gradio as gr |
|
|
|
openai.api_key = "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" |
|
|
|
def transcribe(audio): |
|
with open(audio, "rb") as audio_file: |
|
transcript = openai.Audio.transcribe("whisper-1", audio_file) |
|
return transcript["text"] |
|
|
|
def generate_response(transcribed_text): |
|
response = openai.Completion.create( |
|
engine="text-davinci-003", |
|
prompt=transcribed_text, |
|
max_tokens=1024, |
|
n=1, |
|
stop=None, |
|
temperature=0.5, |
|
) |
|
return response.choices[0].text |
|
|
|
def inference(text): |
|
output_file = "tts_output.wav" |
|
cmd = ['tts', '--text', text, '--out_path', output_file] |
|
subprocess.run(cmd, check=True) |
|
return output_file |
|
|
|
def process_audio_and_respond(audio): |
|
text = transcribe(audio) |
|
response_text = generate_response(text) |
|
output_file = inference(response_text) |
|
return output_file |
|
|
|
demo = gr.Interface( |
|
process_audio_and_respond, |
|
gr.inputs.Audio(source="microphone", type="filepath", label="Speak your question"), |
|
gr.outputs.Audio(type="filepath", label="Answer"), |
|
title="AI Question Answering", |
|
description="Ask any question and get an AI-generated answer as audio output.", |
|
theme="compact", |
|
layout="vertical", |
|
allow_flagging=False, |
|
live=True, |
|
) |
|
|
|
demo.launch() |