Spaces:
Running
Running
import os | |
import gradio as gr | |
import ollama | |
public_ip = os.environ['PUBLIC_IP'] | |
port = os.environ['PORT'] | |
model = 'llama3.1' | |
from ollama import Client | |
client = Client(host=f'http://{public_ip}:{port}') | |
def format_history(msg: str, history: list[list[str, str]], system_prompt: str): | |
chat_history = [{"role": "system", "content":system_prompt}] | |
for query, response in history: | |
chat_history.append({"role": "user", "content": query}) | |
chat_history.append({"role": "assistant", "content": response}) | |
chat_history.append({"role": "user", "content": msg}) | |
return chat_history | |
def generate_response(msg: str, history: list[list[str, str]], system_prompt: str, top_k: int, top_p: float, temperature: float): | |
chat_history = format_history(msg, history, system_prompt) | |
response = client.chat(model=model, | |
stream=True, | |
messages=chat_history, | |
options={'top_k':top_k, 'top_p':top_p, 'temperature':temperature}) | |
message = "" | |
for partial_resp in response: | |
token = partial_resp["message"]["content"] | |
message += token | |
yield message | |
chatbot = gr.ChatInterface( | |
generate_response, | |
chatbot=gr.Chatbot( | |
avatar_images=["user.png", "chatbot.png"], | |
height="64vh" | |
), | |
additional_inputs=[ | |
gr.Textbox("You are a helpful assistant and always try to answer user queries to the best of your ability.", label="System Prompt"), | |
gr.Slider(0.0,100.0, label="top_k", value=40, info="Reduces the probability of generating nonsense. A higher value (e.g. 100) will give more diverse answers, while a lower value (e.g. 10) will be more conservative. (Default: 40)"), | |
gr.Slider(0.0,1.0, label="top_p", value=0.9, info=" Works together with top-k. A higher value (e.g., 0.95) will lead to more diverse text, while a lower value (e.g., 0.5) will generate more focused and conservative text. (Default: 0.9)"), | |
gr.Slider(0.0,2.0, label="temperature", value=0.4, info="The temperature of the model. Increasing the temperature will make the model answer more creatively. (Default: 0.8)"), | |
], | |
title="Trashcan AI", | |
description="LLama3.1 hosted on a 2013 \"Trashcan\" Mac Pro with ollama", | |
theme="finlaymacklon/smooth_slate", | |
submit_btn="Send", | |
retry_btn="π Regenerate Response", | |
undo_btn="β© Delete Previous", | |
clear_btn="ποΈ Clear Chat" | |
) | |
chatbot.queue().launch() | |