File size: 1,347 Bytes
a40ce12
 
e6c4009
c827f71
e6c4009
 
 
c827f71
 
 
e6c4009
 
c827f71
 
 
 
 
 
 
 
 
 
 
 
a40ce12
 
 
 
c827f71
 
 
 
 
 
 
a40ce12
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import os
import subprocess
import openai
import gradio as gr

openai.api_key = "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"

def transcribe(audio):
    with open(audio, "rb") as audio_file:
        transcript = openai.Audio.transcribe("whisper-1", audio_file)
    return transcript["text"]

def generate_response(transcribed_text):
    response = openai.Completion.create(
        engine="text-davinci-003",
        prompt=transcribed_text,
        max_tokens=1024,
        n=1,
        stop=None,
        temperature=0.5,
    )
    return response.choices[0].text

def inference(text):
    output_file = "tts_output.wav"
    cmd = ['tts', '--text', text, '--out_path', output_file]
    subprocess.run(cmd, check=True)
    return output_file

def process_audio_and_respond(audio):
    text = transcribe(audio)
    response_text = generate_response(text)
    output_file = inference(response_text)
    return output_file

demo = gr.Interface(
    process_audio_and_respond,
    gr.inputs.Audio(source="microphone", type="filepath", label="Speak your question"),
    gr.outputs.Audio(type="filepath", label="Answer"),
    title="AI Question Answering",
    description="Ask any question and get an AI-generated answer as audio output.",
    theme="compact",
    layout="vertical",
    allow_flagging=False,
    live=True,
)

demo.launch()