NeonLLM / app.py
NeonBohdan's picture
Use Client
a3e95e6
raw
history blame
2.56 kB
import os
import json
from typing import List, Tuple
from collections import OrderedDict
import gradio as gr
from shared import Client
config = json.loads(os.environ['CONFIG'])
model_names = list(config.keys())
clients = {}
for name in config:
model_personas = config[name].get("personas", {})
client = Client(
api_url=os.environ[config[name]['api_url']],
api_key=os.environ[config[name]['api_key']],
personas=model_personas
)
clients[name] = client
personas = list(OrderedDict.fromkeys(persona for name in model_names for persona in clients[name].personas))
info = "\n".join([f"{model} ({config[model]['name']}): {list(clients[model].personas.keys())}" for model in model_names])
def respond(
message,
history: List[Tuple[str, str]],
persona,
model,
info,
conversational,
max_tokens,
):
client = clients[model]
messages = []
try:
system_prompt = client.personas[persona]
except KeyError:
supported_personas = list(client.personas.keys())
raise gr.Error(f"Model '{model}' does not support persona '{persona}', only {supported_personas}")
if system_prompt is not None:
messages.append({"role": "system", "content": system_prompt})
if conversational:
for val in history[-2:]:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
completion = client.client.chat.completions.create(
model=client.vllm_model_name,
messages=messages,
max_tokens=max_tokens,
temperature=0,
extra_body={
"repetition_penalty": 1.05,
"use_beam_search": True,
"best_of": 5,
},
)
response = completion.choices[0].message.content
return response
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Radio(choices=personas, value="default", label="persona"),
gr.Radio(choices=model_names, value="stable", label="model"),
gr.Textbox(value=info, interactive=False, label="info"),
gr.Checkbox(value=True, label="conversational"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
],
additional_inputs_accordion=gr.Accordion(label="Config", open=True),
title="NeonLLM (v2024-06-17)",
concurrency_limit=5,
)
if __name__ == "__main__":
demo.launch()