File size: 525 Bytes
f41e5fe 036f518 f41e5fe bc985a0 036f518 f41e5fe 036f518 461052c f41e5fe 036f518 f41e5fe 036f518 3fa3baf 036f518 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 |
from fastapi import FastAPI
from llama_cpp import Llama
from pydantic import BaseModel
from chat import Chat
model_path = "zephyr-7b-beta.Q4_K_S.gguf"
llm = Llama(model_path=model_path, n_ctx=512, max_answer_len=100) # Set chat_format according to the model you are using
class validation(BaseModel):
prompt: str
app = FastAPI()
chat = Chat(model=llm)
@app.post("/llm_on_cpu")
async def stream(item: validation):
chat.send_message(item.prompt)
response = chat.generate_reply()
return llm(response)
|