edersonmelo commited on
Commit
d857963
·
verified ·
1 Parent(s): 0e18200

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +32 -32
main.py CHANGED
@@ -1,33 +1,33 @@
1
- from fastapi import FastAPI
2
- import requests
3
- from llama_cpp import Llama
4
- import threading
5
-
6
- app = FastAPI()
7
-
8
- llm = None
9
-
10
- def start_llm():
11
- llm = Llama(model_path="./tinyllama-1.1b-chat.gguf")
12
-
13
-
14
- @app.post("/health")
15
- return {"status": "ok"}
16
-
17
- @app.post("/llm")
18
- async def stream(item: dict):
19
-
20
- if llm is None:
21
- raise ValueError("modelo carregando, por favor tente mais tarde")
22
-
23
- if 'prompt' not in item.keys():
24
- raise ValueError("prompt é obrigatório")
25
-
26
- prompt = item['prompt']
27
- temperatura = item['temperatura'] if 'temperatura' in item.keys() else 0.2
28
- max_tokens = item['max_tokens'] if 'max_tokens' in item.keys() else 512
29
-
30
- return llm(prompt, max_tokens=max_tokens, temperature=temperatura)
31
-
32
-
33
  threading.Thread(target=start_llm).start()
 
1
+ from fastapi import FastAPI
2
+ import requests
3
+ from llama_cpp import Llama
4
+ import threading
5
+
6
+ app = FastAPI()
7
+
8
+ llm = None
9
+
10
+ def start_llm():
11
+ llm = Llama(model_path="./tinyllama-1.1b-chat.gguf")
12
+
13
+
14
+ @app.post("/health")
15
+ return {"status": "ok"}
16
+
17
+ @app.post("/deployllm")
18
+ async def stream(item: dict):
19
+
20
+ if llm is None:
21
+ raise ValueError("modelo carregando, por favor tente mais tarde")
22
+
23
+ if 'prompt' not in item.keys():
24
+ raise ValueError("prompt é obrigatório")
25
+
26
+ prompt = item['prompt']
27
+ temperatura = item['temperatura'] if 'temperatura' in item.keys() else 0.2
28
+ max_tokens = item['max_tokens'] if 'max_tokens' in item.keys() else 512
29
+
30
+ return llm(prompt, max_tokens=max_tokens, temperature=temperatura)
31
+
32
+
33
  threading.Thread(target=start_llm).start()