Spaces:

megamined
/

jarvis

Paused

File size: 3,799 Bytes

import os
import time

import gradio as gr
import openai
from dotenv import load_dotenv, find_dotenv
from simpleaichat import AIChat

from main import weather, search
from utils.tts import TTS, voices

load_dotenv(find_dotenv())
openai.api_key = os.getenv("OPENAI_API_KEY")


def transcribe(audio_file, state=""):
    time.sleep(5)
    if audio_file is None:
        return None
    prompt = (
        "The author of this tool is Somto Muotoe. "
        "Friends: Ire Ireoluwa Adedugbe, Biola Aderiye, Jelson, Raj, Akshay."
        "Umm, let me think like, hmm... Okay, here's what I'm, like, "
        "thinking. "
    )
    with open(audio_file, "rb") as f:
        response = openai.Audio.transcribe("whisper-1", f, prompt=prompt)
        text = response["text"]
    state += text
    return state, state


def chat_with_gpt(prompt, ai_state, model):
    if ai_state is None:
        params = {"temperature": 0.0, "max_tokens": 200}
        system_prompt = (
            "You are a confidante whose response is curt and concise."
            "You can use tools to give real-time updates on weather and search the internet.  "
            "Answer all questions empathetically, and ALWAYS ask follow-up questions."
            "Do NOT say Confidante in any response."
            "You must TRUST the provided context to inform your response."
        )
        ai = AIChat(
            params=params, model=model, system=system_prompt, save_messages=True
        )
    else:
        ai = ai_state
    tools = [weather, search]

    response = ai(prompt, tools=tools)
    text_response = response["response"]
    print(text_response)
    return text_response, ai


def tts(text, voice_id):
    # Generate audio from the text response
    tts_ = TTS(voice_id)
    audio_data = tts_.generate(text=text)
    return audio_data


def transcribe_and_chat(audio_file, voice, history, ai_state, model):
    if audio_file is None:
        raise gr.Error("Empty audio file.")
    voice_id = voices[voice]

    text, text_state = transcribe(audio_file)
    gpt_response, ai_state = chat_with_gpt(text, ai_state, model)
    audio_data = tts(gpt_response, voice_id)

    # Update the history with the new messages
    history.append((text, gpt_response))

    return history, audio_data, history, ai_state


def clear_chat(history):
    # Clear the chat history
    history.clear()

    # Clear the chat for the AIChat object
    chat_with_gpt("", ai_state=None)

    return history


with gr.Blocks(title="JARVIS") as demo:
    gr.Markdown(
        "# Talk with GPT-4! You can get real-time weather updates, and can search Google."
    )
    audio_input = gr.Audio(source="microphone", type="filepath", visible=True)
    gr.ClearButton(audio_input)
    gr.Markdown(
        "Choose the AI model to use for generating responses. "
        "GPT-4 is slower but more accurate, while GPT-3.5-turbo-16k is faster but less accurate."
    )
    model_select = gr.Radio(
        choices=["gpt-4", "gpt-3.5-turbo-16k"],
        label="Model",
        value="gpt-3.5-turbo-16k",
    )

    voice_select = gr.Radio(choices=list(voices.keys()), label="Voice", value="Bella")
    history = gr.State(label="History", value=[])
    ai_state = gr.State(label="AIChat", value=None)
    # transcription = gr.Textbox(lines=2, label="Transcription")
    chat_box = gr.Chatbot(label="Response")
    response_audio = gr.Audio(label="Response Audio", autoplay=True)
    gr.ClearButton(chat_box, value="Clear Chat")
    # clear_chat_btn.click(clear_chat, inputs=history, outputs=history)

    audio_input.stop_recording(
        transcribe_and_chat,
        inputs=[audio_input, voice_select, history, ai_state, model_select],
        outputs=[chat_box, response_audio, history, ai_state],
    )
    audio_input.clear()

demo.launch()