api_for_chat / app.py
ldhldh's picture
Update app.py
a2f0814
raw
history blame
2.85 kB
from threading import Thread
import gradio as gr
import inspect
from gradio import routes
from typing import List, Type
from petals import AutoDistributedModelForCausalLM
from transformers import AutoTokenizer
import npc_data
import requests, os, re, asyncio, json
loop = asyncio.get_event_loop()
# init code
def get_types(cls_set: List[Type], component: str):
docset = []
types = []
if component == "input":
for cls in cls_set:
doc = inspect.getdoc(cls)
doc_lines = doc.split("\n")
docset.append(doc_lines[1].split(":")[-1])
types.append(doc_lines[1].split(")")[0].split("(")[-1])
else:
for cls in cls_set:
doc = inspect.getdoc(cls)
doc_lines = doc.split("\n")
docset.append(doc_lines[-1].split(":")[-1])
types.append(doc_lines[-1].split(")")[0].split("(")[-1])
return docset, types
routes.get_types = get_types
# App code
model_name = "daekeun-ml/Llama-2-ko-instruct-13B"
#daekeun-ml/Llama-2-ko-instruct-13B
#quantumaikr/llama-2-70b-fb16-korean
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = None
def check(model_name):
data = requests.get("https://health.petals.dev/api/v1/state").json()
out = []
for d in data['model_reports']:
if d['name'] == model_name:
if d['state']=="healthy":
return True
return False
def init():
global model
if check(model_name):
model = AutoDistributedModelForCausalLM.from_pretrained(model_name)
def chat(id, npc, prompt):
if model == None:
init()
return "no model"
# get_coin endpoint
response = requests.post("https://ldhldh-api-for-unity.hf.space/run/predict_6", json={
"data": [
id,
]}).json()
coin = response["data"][0]
if int(coin) == 0:
return "no coin"
# model inference
if check(model_name):
prom = prompt
inputs = tokenizer(prom, return_tensors="pt")["input_ids"]
outputs = model.generate(inputs, max_new_tokens=100)
print(tokenizer.decode(outputs[0]))
output = outputs[len(prom):-1]
print(output)
else:
output = "no model"
# add_transaction endpoint
response = requests.post("https://ldhldh-api-for-unity.hf.space/run/predict_5", json={
"data": [
id,
"inference",
"### input:\n" + prompt + "\n\n### output:\n" + output
]}).json()
d = response["data"][0]
return output
with gr.Blocks() as demo:
count = 0
aa = gr.Interface(
fn=chat,
inputs=["text","text","text"],
outputs="text",
description="chat, ai 응닡을 λ°˜ν™˜ν•©λ‹ˆλ‹€. λ‚΄λΆ€μ μœΌλ‘œ νŠΈλžœμž­μ…˜ 생성. \n /run/predict",
)
demo.queue(max_size=32).launch(enable_queue=True)