edersonmelo commited on
Commit
18f4287
·
verified ·
1 Parent(s): 2e5f69a

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +3 -17
main.py CHANGED
@@ -1,25 +1,13 @@
1
  from fastapi import FastAPI
2
  import requests
3
  from llama_cpp import Llama
4
- import threading
5
 
6
  app = FastAPI()
7
 
8
- llm = None
9
 
10
- def start_llm():
11
- global llm # Adicione esta linha para modificar a variável global
12
- llm = Llama(model_path="./tinyllama-1.1b-chat.gguf")
13
-
14
- @app.post("/health")
15
- def health_check():
16
- return {"status": "ok"}
17
-
18
- @app.post("/deployllm")
19
  async def stream(item: dict):
20
-
21
- if llm is None:
22
- raise ValueError("modelo carregando, por favor tente mais tarde")
23
 
24
  if 'prompt' not in item.keys():
25
  raise ValueError("prompt é obrigatório")
@@ -28,6 +16,4 @@ async def stream(item: dict):
28
  temperatura = item['temperatura'] if 'temperatura' in item.keys() else 0.2
29
  max_tokens = item['max_tokens'] if 'max_tokens' in item.keys() else 512
30
 
31
- return llm(prompt, max_tokens=max_tokens, temperature=temperatura)
32
-
33
- threading.Thread(target=start_llm).start()
 
1
  from fastapi import FastAPI
2
  import requests
3
  from llama_cpp import Llama
 
4
 
5
  app = FastAPI()
6
 
7
+ llm = Llama(model_path="./tinyllama-1.1b-chat.gguf")
8
 
9
+ @app.post("/llm")
 
 
 
 
 
 
 
 
10
  async def stream(item: dict):
 
 
 
11
 
12
  if 'prompt' not in item.keys():
13
  raise ValueError("prompt é obrigatório")
 
16
  temperatura = item['temperatura'] if 'temperatura' in item.keys() else 0.2
17
  max_tokens = item['max_tokens'] if 'max_tokens' in item.keys() else 512
18
 
19
+ return llm(prompt, max_tokens=max_tokens, temperature=temperatura)