File size: 1,300 Bytes
e6c4009
c827f71
e6c4009
 
 
c827f71
 
 
e6c4009
 
c827f71
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e6c4009
c827f71
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import openai
import gradio as gr

openai.api_key = "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"

def transcribe(audio):
    with open(audio, "rb") as audio_file:
        transcript = openai.Audio.transcribe("whisper-1", audio_file)
    return transcript["text"]

def generate_response(transcribed_text):
    response = openai.Completion.create(
        engine="text-davinci-003",
        prompt=transcribed_text,
        max_tokens=1024,
        n=1,
        stop=None,
        temperature=0.5,
    )
    return response.choices[0].text

def run_cmd(command):
    try:
        print(command)
        call(command)
    except KeyboardInterrupt:
        print("Process interrupted")
        sys.exit(1)

def inference(text):
    cmd = ['tts', '--text', text]
    run_cmd(cmd)
    return 'tts_output.wav'

def process_audio_and_respond(audio):
    text = transcribe(audio)
    response_text = generate_response(text)
    output_file = inference(response_text)
    return output_file

demo = gr.Blocks()

with demo:
    audio_file = gr.inputs.Audio(source="microphone", type="filepath")
    button = gr.Button("Uliza Swali")
    outputs = gr.outputs.Audio(type="filepath", label="Output Audio")

    button.click(fn=process_audio_and_respond, inputs=audio_file, outputs=outputs)

demo.launch()