import openai import gradio as gr openai.api_key = "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" def transcribe(audio): with open(audio, "rb") as audio_file: transcript = openai.Audio.transcribe("whisper-1", audio_file) return transcript["text"] def generate_response(transcribed_text): response = openai.Completion.create( engine="text-davinci-003", prompt=transcribed_text, max_tokens=1024, n=1, stop=None, temperature=0.5, ) return response.choices[0].text def run_cmd(command): try: print(command) call(command) except KeyboardInterrupt: print("Process interrupted") sys.exit(1) def inference(text): cmd = ['tts', '--text', text] run_cmd(cmd) return 'tts_output.wav' def process_audio_and_respond(audio): text = transcribe(audio) response_text = generate_response(text) output_file = inference(response_text) return output_file demo = gr.Blocks() with demo: audio_file = gr.inputs.Audio(source="microphone", type="filepath") button = gr.Button("Uliza Swali") outputs = gr.outputs.Audio(type="filepath", label="Output Audio") button.click(fn=process_audio_and_respond, inputs=audio_file, outputs=outputs) demo.launch()