Spaces:
Runtime error
Runtime error
from threading import Thread | |
import gradio as gr | |
import inspect | |
from gradio import routes | |
from typing import List, Type | |
from petals import AutoDistributedModelForCausalLM | |
from transformers import AutoTokenizer | |
import npc_data | |
import requests, os, re, asyncio, json | |
loop = asyncio.get_event_loop() | |
# init code | |
def get_types(cls_set: List[Type], component: str): | |
docset = [] | |
types = [] | |
if component == "input": | |
for cls in cls_set: | |
doc = inspect.getdoc(cls) | |
doc_lines = doc.split("\n") | |
docset.append(doc_lines[1].split(":")[-1]) | |
types.append(doc_lines[1].split(")")[0].split("(")[-1]) | |
else: | |
for cls in cls_set: | |
doc = inspect.getdoc(cls) | |
doc_lines = doc.split("\n") | |
docset.append(doc_lines[-1].split(":")[-1]) | |
types.append(doc_lines[-1].split(")")[0].split("(")[-1]) | |
return docset, types | |
routes.get_types = get_types | |
# App code | |
model_name = "daekeun-ml/Llama-2-ko-instruct-13B" | |
#daekeun-ml/Llama-2-ko-instruct-13B | |
#quantumaikr/llama-2-70b-fb16-korean | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = None | |
def check(model_name): | |
data = requests.get("https://health.petals.dev/api/v1/state").json() | |
out = [] | |
for d in data['model_reports']: | |
if d['name'] == model_name: | |
if d['state']=="healthy": | |
return True | |
return False | |
def init(): | |
global model | |
if check(model_name): | |
model = AutoDistributedModelForCausalLM.from_pretrained(model_name) | |
def chat(id, npc, text): | |
if model == None: | |
init() | |
return "no model" | |
# get_coin endpoint | |
response = requests.post("https://ldhldh-api-for-unity.hf.space/run/predict_6", json={ | |
"data": [ | |
id, | |
]}).json() | |
coin = response["data"][0] | |
if int(coin) == 0: | |
return "no coin" | |
# model inference | |
if check(model_name): | |
prom = f"""<s><<SYS>> | |
{npc_data.system_message}<</SYS>> | |
[INST] | |
{npc_data} | |
### ๋ช ๋ น์ด: | |
๋ํ๊ธฐ๋ก์ ์ฐธ๊ณ ํ์ฌ Rabbit์ด ํ ๋ง์ ์์ฐ์ค๋ฝ๊ฒ ์์ฑํด์ฃผ์ธ์. ํ ๋ฌธ์ฅ๋ง ์์ฑํ์ธ์. | |
์ด์ ๋ํ๊ธฐ๋ก: | |
''' | |
{history} | |
''' | |
### User: | |
{user_message} | |
### Rabbit:[/INST] | |
""" | |
inputs = tokenizer(prom, return_tensors="pt")["input_ids"] | |
outputs = model.generate(inputs, max_new_tokens=100) | |
print(tokenizer.decode(outputs[0])) | |
output = tokenizer.decode(outputs[0])[len(prom):-1] | |
print(output) | |
else: | |
output = "no model" | |
# add_transaction endpoint | |
response = requests.post("https://ldhldh-api-for-unity.hf.space/run/predict_5", json={ | |
"data": [ | |
id, | |
"inference", | |
"### input:\n" + prompt + "\n\n### output:\n" + output | |
]}).json() | |
d = response["data"][0] | |
return output | |
with gr.Blocks() as demo: | |
count = 0 | |
aa = gr.Interface( | |
fn=chat, | |
inputs=["text","text","text"], | |
outputs="text", | |
description="chat, ai ์๋ต์ ๋ฐํํฉ๋๋ค. ๋ด๋ถ์ ์ผ๋ก ํธ๋์ญ์ ์์ฑ. \n /run/predict", | |
) | |
demo.queue(max_size=32).launch(enable_queue=True) |