NeonLLM / app.py
NeonBohdan's picture
Added multi model config
9513cae
raw
history blame
1.99 kB
import os
import json
from typing import List, Tuple
from collections import OrderedDict
import gradio as gr
from openai import OpenAI
config = json.loads(os.environ['CONFIG'])
model_names = list(config.keys())
personas = list(OrderedDict.fromkeys(persona for name in config for persona in config[name]["personas"]))
clients = {}
for name in config:
client = OpenAI(
base_url=f"{os.environ[config[name]['api_url']]}/v1",
api_key=os.environ[config[name]['api_key']],
)
clients[name] = client
def respond(
message,
history: List[Tuple[str, str]],
model,
persona,
conversational,
max_tokens,
):
messages = []
system_prompt = config[model]["personas"][persona]
if system_prompt is not None:
messages.append({"role": "system", "content": system_prompt})
if conversational:
for val in history[-2:]:
if val[0]:
messages.append({"role": "user", "content": val[0]})
if val[1]:
messages.append({"role": "assistant", "content": val[1]})
messages.append({"role": "user", "content": message})
completion = clients[model].chat.completions.create(
model="neongeckocom/NeonLLM",
messages=messages,
max_tokens=max_tokens,
temperature=0,
extra_body={
"repetition_penalty": 1.05,
"use_beam_search": True,
"best_of": 5,
},
)
response = completion.choices[0].message.content
return response
demo = gr.ChatInterface(
respond,
additional_inputs=[
gr.Radio(choices=model_names, value="stable", label="model"),
gr.Radio(choices=personas, value="default", label="persona"),
gr.Checkbox(value=True, label="conversational"),
gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
],
title="NeonLLM (v2024-06-06)",
concurrency_limit=5,
)
if __name__ == "__main__":
demo.launch()