edersonmelo commited on
Commit
eb8ac8a
·
verified ·
1 Parent(s): 18f4287

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +17 -3
main.py CHANGED
@@ -1,13 +1,25 @@
1
  from fastapi import FastAPI
2
  import requests
3
  from llama_cpp import Llama
 
4
 
5
  app = FastAPI()
6
 
7
- llm = Llama(model_path="./tinyllama-1.1b-chat.gguf")
8
 
9
- @app.post("/llm")
 
 
 
 
 
 
 
 
10
  async def stream(item: dict):
 
 
 
11
 
12
  if 'prompt' not in item.keys():
13
  raise ValueError("prompt é obrigatório")
@@ -16,4 +28,6 @@ async def stream(item: dict):
16
  temperatura = item['temperatura'] if 'temperatura' in item.keys() else 0.2
17
  max_tokens = item['max_tokens'] if 'max_tokens' in item.keys() else 512
18
 
19
- return llm(prompt, max_tokens=max_tokens, temperature=temperatura)
 
 
 
1
  from fastapi import FastAPI
2
  import requests
3
  from llama_cpp import Llama
4
+ import threading
5
 
6
  app = FastAPI()
7
 
8
+ llm = None
9
 
10
+ def start_llm():
11
+ global llm # Adicione esta linha para modificar a variável global
12
+ llm = Llama(model_path="./tinyllama-1.1b-chat.gguf")
13
+
14
+ @app.post("/health")
15
+ def health_check():
16
+ return {"status": "ok"}
17
+
18
+ @app.post("/deployllm")
19
  async def stream(item: dict):
20
+
21
+ if llm is None:
22
+ raise ValueError("modelo carregando, por favor tente mais tarde")
23
 
24
  if 'prompt' not in item.keys():
25
  raise ValueError("prompt é obrigatório")
 
28
  temperatura = item['temperatura'] if 'temperatura' in item.keys() else 0.2
29
  max_tokens = item['max_tokens'] if 'max_tokens' in item.keys() else 512
30
 
31
+ return llm(prompt, max_tokens=max_tokens, temperature=temperatura)
32
+
33
+ threading.Thread(target=start_llm).start()