File size: 1,765 Bytes
01d1dd6
c2f1466
 
 
 
cf7a07e
5aca742
b0d9242
 
 
a7feab8
7192ffe
f223851
 
 
da9b438
c2f1466
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c7a4ce6
 
f223851
c56ddd5
f223851
628eb7c
e87ef93
98e337c
 
 
 
1418034
5be15aa
d79f686
5be15aa
f223851
1418034
 
 
 
f223851
 
ddcca98
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
from threading import Thread
import gradio as gr
import inspect
from gradio import routes
from typing import List, Type

import requests, os, re, asyncio, queue
import math
import time
import datetime
import requests, json

from huggingface_hub import hf_hub_download
from llama_cpp import Llama

loop = asyncio.get_event_loop()
# Monkey patch
def get_types(cls_set: List[Type], component: str):
    docset = []
    types = []
    if component == "input":
        for cls in cls_set:
            doc = inspect.getdoc(cls)
            doc_lines = doc.split("\n")
            docset.append(doc_lines[1].split(":")[-1])
            types.append(doc_lines[1].split(")")[0].split("(")[-1])
    else:
        for cls in cls_set:
            doc = inspect.getdoc(cls)
            doc_lines = doc.split("\n")
            docset.append(doc_lines[-1].split(":")[-1])
            types.append(doc_lines[-1].split(")")[0].split("(")[-1])
    return docset, types
routes.get_types = get_types

hf_hub_download(repo_id='StarFox7/Llama-2-ko-7B-chat-gguf', filename='Llama-2-ko-7B-chat-gguf-q4_0.bin', local_dir='./')

llm = Llama(model_path = 'Llama-2-ko-7B-chat-gguf-q4_0.bin',
            n_ctx=512,
      )
# App code
def chat(x):    
    #prom = f"๋‹ค์Œ์€ A์™€ B์˜ ์—ญํ• ๊ทน์ด์•ผ. ๋„ˆ๋Š” B์•ผ. A์™€ ๋Œ€ํ™”ํ•˜๊ณ  ์žˆ์–ด. ์นœ๊ตฌ์—๊ฒŒ ์นœ๊ทผํ•˜๊ณ  ๊ฐ„๊ฒฐํ•˜๊ฒŒ ์ž˜ ๋Œ€๋‹ตํ•ด์ค˜.\n\n### A:\n{x}\n\n### B:\n"
    #output = llm(prom, max_tokens=20, stop=["###"], echo=True)
    #return output['choices'][0]['text'][len(prom):-1]
    return "AI ์‘๋‹ต์ž…๋‹ˆ๋‹ค."
    
with gr.Blocks() as demo:
    count = 0
    aa = gr.Interface(
      fn=chat,
      inputs=["text"],
      outputs="text",
      description="call",
    )


    demo.queue(max_size=32).launch(enable_queue=True)