Spaces:
Paused
Paused
Update olapp.py
Browse files
olapp.py
CHANGED
@@ -1,34 +1,72 @@
|
|
1 |
from http.server import HTTPServer, BaseHTTPRequestHandler
|
2 |
from urllib.parse import urlparse, parse_qs
|
3 |
-
import urllib.parse
|
4 |
-
import json
|
5 |
from llama_cpp import Llama
|
6 |
|
7 |
-
|
8 |
|
9 |
-
|
|
|
|
|
|
|
|
|
10 |
|
11 |
-
|
|
|
|
|
|
|
|
|
|
|
12 |
|
|
|
|
|
|
|
13 |
|
14 |
class OlHandler(BaseHTTPRequestHandler):
|
15 |
|
16 |
def do_GET(self):
|
17 |
query_components = parse_qs(urlparse(self.path).query)
|
18 |
-
|
19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
#output = llm.create_completion(
|
22 |
# q,
|
23 |
# max_tokens=32,
|
24 |
# echo=False
|
25 |
-
#)["choices"][0]["text"]
|
26 |
|
27 |
self.send_response(200)
|
28 |
self.send_header('Content-type','text/plain; charset=utf-8')
|
29 |
self.end_headers()
|
30 |
#self.wfile.write(output.encode('utf-8'))
|
31 |
-
|
32 |
return
|
33 |
|
34 |
|
|
|
1 |
from http.server import HTTPServer, BaseHTTPRequestHandler
|
2 |
from urllib.parse import urlparse, parse_qs
|
|
|
|
|
3 |
from llama_cpp import Llama
|
4 |
|
5 |
+
SYSTEM_PROMPT = "Ты — русскоязычный автоматический ассистент и профессиональный редактор. Ты выполняешь указания пользователя в точности, соблюдая все детали задания. Ты хорошо умеешь обобщать текст, выделяя только основной смысл."
|
6 |
|
7 |
+
ef get_message_tokens(llm, role, content):
|
8 |
+
content = f"{role}\n{content}\n</s>"
|
9 |
+
content = content.encode("utf-8")
|
10 |
+
message_tokens = llm.tokenize(content, special=True)
|
11 |
+
return message_tokens
|
12 |
|
13 |
+
def get_system_tokens(llm):
|
14 |
+
system_message = {
|
15 |
+
"role": "system",
|
16 |
+
"content": SYSTEM_PROMPT
|
17 |
+
}
|
18 |
+
return get_message_tokens(llm, **system_message)
|
19 |
|
20 |
+
llm = Llama(model_path="/home/oluser/olapp/model-q4_K.gguf", n_ctx=2048, n_parts=1)
|
21 |
+
|
22 |
+
system_tokens = get_system_tokens(llm)
|
23 |
|
24 |
class OlHandler(BaseHTTPRequestHandler):
|
25 |
|
26 |
def do_GET(self):
|
27 |
query_components = parse_qs(urlparse(self.path).query)
|
28 |
+
q = query_components["q"][0]
|
29 |
+
|
30 |
+
tokens = system_tokens
|
31 |
+
llm.eval(tokens)
|
32 |
+
|
33 |
+
# TODO: add few shot
|
34 |
+
|
35 |
+
message_tokens = get_message_tokens(llm=llm, role="user",
|
36 |
+
content="Напиши краткое изложение текста, представленного ниже, в одном предложении.\nПредложение должно быть лаконичным о отражать основной смысл события или новости.\n\n" + q )
|
37 |
+
role_tokens = llm.tokenize("bot\n".encode("utf-8"), special=True)
|
38 |
+
tokens += message_tokens + role_tokens
|
39 |
+
# full_prompt = llm.detokenize(tokens)
|
40 |
|
41 |
+
generator = llm.generate(
|
42 |
+
tokens,
|
43 |
+
top_k=30,
|
44 |
+
top_p=.9,
|
45 |
+
temp=.2,
|
46 |
+
repeat_penalty=1.21
|
47 |
+
)
|
48 |
+
|
49 |
+
answ = ""
|
50 |
+
|
51 |
+
for tok in generator:
|
52 |
+
token_str = llm.detokenize([tok]).decode("utf-8", errors="ignore")
|
53 |
+
tokens.append(tok)
|
54 |
+
if tok == llm.token_eos():
|
55 |
+
break
|
56 |
+
answ += token_str
|
57 |
+
#print(token_str, end="", flush=True)
|
58 |
+
|
59 |
#output = llm.create_completion(
|
60 |
# q,
|
61 |
# max_tokens=32,
|
62 |
# echo=False
|
63 |
+
#)["choices"][0]["text"]
|
64 |
|
65 |
self.send_response(200)
|
66 |
self.send_header('Content-type','text/plain; charset=utf-8')
|
67 |
self.end_headers()
|
68 |
#self.wfile.write(output.encode('utf-8'))
|
69 |
+
swlf.wfile.write(answ.encode('utf-8'))
|
70 |
return
|
71 |
|
72 |
|