File size: 3,730 Bytes
b36a86c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
import os
import time

import gradio as gr
import openai
from dotenv import load_dotenv, find_dotenv
from simpleaichat import AIChat

from main import weather, search, MODEL
from utils.tts import TTS, voices

load_dotenv(find_dotenv())
openai.api_key = os.getenv("OPENAI_API_KEY")


def transcribe(audio_file, state=""):
    time.sleep(5)
    if audio_file is None:
        return None
    prompt = (
        "The author of this tool is Somto Muotoe. "
        "Friends: Ire Ireoluwa Adedugbe, Biola Aderiye, Jelson, Raj, Akshay."
        "Umm, let me think like, hmm... Okay, here's what I'm, like, "
        "thinking. "
    )
    with open(audio_file, "rb") as f:
        response = openai.Audio.transcribe("whisper-1", f, prompt=prompt)
        text = response["text"]
    state += text
    return state, state


def chat_with_gpt(prompt, ai_state):
    if ai_state is None:
        params = {"temperature": 0.0, "max_tokens": 200}
        system_prompt = (
            "You are a confidante whose response is curt and concise."
            "You can use tools to give real-time updates on weather and search the internet.  "
            "Answer all questions empathetically, and ALWAYS ask follow-up questions."
            "Do NOT say Confidante in any response."
            "You must TRUST the provided context to inform your response."
            # "If a question does not make any sense, or is not factually coherent, explain why "
            # "instead of answering something not correct. If you don't know the answer to a question, "
            # "please don't share false information."
        )
        ai = AIChat(
            params=params, model=MODEL, system=system_prompt, save_messages=True
        )
    else:
        ai = ai_state
    tools = [weather, search]

    response = ai(prompt, tools=tools)
    text_response = response["response"]
    print(text_response)
    return text_response, ai


def tts(text, voice_id):
    # Generate audio from the text response
    tts_ = TTS(voice_id)
    audio_data = tts_.generate(text=text)
    return audio_data


def transcribe_and_chat(audio_file, voice, history, ai_state):
    if audio_file is None:
        raise gr.Error("Empty audio file.")
    voice_id = voices[voice]

    text, text_state = transcribe(audio_file)
    gpt_response, ai_state = chat_with_gpt(text, ai_state)
    audio_data = tts(gpt_response, voice_id)

    # Update the history with the new messages
    history.append((text, gpt_response))

    return history, audio_data, history, ai_state


def clear_chat(history):
    # Clear the chat history
    history.clear()

    # Clear the chat for the AIChat object
    chat_with_gpt("", ai_state=None)

    return history


with gr.Blocks(title="JARVIS") as demo:
    gr.Markdown(
        "# Talk with GPT-4! You can get real-time weather updates, and can search Google."
    )
    audio_input = gr.Audio(source="microphone", type="filepath", visible=True)
    gr.ClearButton(audio_input)

    voice_select = gr.Radio(choices=list(voices.keys()), label="Voice", value="Bella")
    history = gr.State(label="History", value=[])
    ai_state = gr.State(label="AIChat", value=None)
    # transcription = gr.Textbox(lines=2, label="Transcription")
    chat_box = gr.Chatbot(label="Response")
    response_audio = gr.Audio(label="Response Audio", autoplay=True)
    gr.ClearButton(chat_box, value="Clear Chat")
    # clear_chat_btn.click(clear_chat, inputs=history, outputs=history)

    audio_input.stop_recording(
        transcribe_and_chat,
        inputs=[audio_input, voice_select, history, ai_state],
        outputs=[chat_box, response_audio, history, ai_state],
    )
    audio_input.clear()

demo.launch(server_port=8080, share=True)