import os import subprocess import openai import gradio as gr openai.api_key = "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" def transcribe(audio): with open(audio, "rb") as audio_file: transcript = openai.Audio.transcribe("whisper-1", audio_file) return transcript["text"] def generate_response(transcribed_text): response = openai.Completion.create( engine="text-davinci-003", prompt=transcribed_text, max_tokens=1024, n=1, stop=None, temperature=0.5, ) return response.choices[0].text def inference(text): output_file = "tts_output.wav" cmd = ['tts', '--text', text, '--out_path', output_file] subprocess.run(cmd, check=True) return output_file def process_audio_and_respond(audio): text = transcribe(audio) response_text = generate_response(text) output_file = inference(response_text) return output_file demo = gr.Interface( process_audio_and_respond, gr.inputs.Audio(source="microphone", type="filepath", label="Speak your question"), gr.outputs.Audio(type="filepath", label="Answer"), title="AI Question Answering", description="Ask any question and get an AI-generated answer as audio output.", theme="compact", layout="vertical", allow_flagging=False, live=True, ) demo.launch()