Spaces:

080-ai
/

cutlass_v1

Runtime error

File size: 2,202 Bytes

aad58a2
 
 
 
192371d
 
aad58a2
192371d
aad58a2
 
 
dabe6f0
 
 
 
192371d
4723a07
192371d
 
 
 
 
 
 
 
aad58a2
192371d
aad58a2
 
192371d
aad58a2
ba1ca3f
aad58a2
 
23c61db
192371d
 
 
 
 
 
aad58a2
192371d
 
62193ce
03e0ef4
aad58a2
192371d
03e0ef4
 
4723a07
192371d
 
aad58a2

import gradio as gr
import os
from openai import OpenAI

# Initialize the OpenAI Client with your API key and endpoint
api_key = os.environ.get("RUNPOD_API_KEY")  # Ensure your API key is correctly loaded from environment variables
client = OpenAI(
    api_key=api_key,
    base_url="https://api.runpod.ai/v2/vllm-k0g4c60zor9xuu/openai/v1",
)

def predict(message, history=None):
    # Ensure history is initialized as an empty list if it's None
    if history is None:
        history = []
    # Append the system role at the start if history is empty
    if not history:
        history.append(("system", "You are a history assistant, that provides the best possible answers to any historical questions asked about American History. Be helpful and specific, providing any detailed nuance needed to have a full understanding of the question."))
    
    # Prepare messages in the format required by OpenAI
    history_openai_format = []
    for human, assistant in history:
        history_openai_format.append({"role": "user", "content": human})
        history_openai_format.append({"role": "assistant", "content": assistant})
    history_openai_format.append({"role": "user", "content": message})

    # Make the API call
    response_stream = client.chat.completions.create(
        model="ambrosfitz/llama-3-history",
        messages=history_openai_format,
        temperature=0,
        max_tokens=150,
        stream=True,
    )

    # Accumulate response chunks to form the full message
    full_message = ""
    for chunk in response_stream:
        if chunk.choices[0].delta.content is not None:
            full_message += chunk.choices[0].delta.content
            yield full_message

    # Update history with the latest exchange
    history.append((message, full_message))

# Set up the Gradio interface
iface = gr.Interface(
    fn=predict,
    inputs=[gr.Textbox(label="Type your question here..."), gr.State()],
    outputs=[gr.Textbox(), gr.State()],
    title="HistoryBot Chat",
    description="Interact with HistoryBot, a specialized assistant for American History. Ask any historical questions to get detailed and nuanced answers.",
    allow_flagging="never"
)

iface.launch()