File size: 525 Bytes
f41e5fe
036f518
f41e5fe
bc985a0
036f518
 
 
 
 
 
 
f41e5fe
 
036f518
461052c
f41e5fe
036f518
 
f41e5fe
 
 
036f518
 
3fa3baf
036f518
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
from fastapi import FastAPI
from llama_cpp import Llama
from pydantic import BaseModel

from chat import Chat

model_path = "zephyr-7b-beta.Q4_K_S.gguf"

llm = Llama(model_path=model_path, n_ctx=512, max_answer_len=100)  # Set chat_format according to the model you are using


class validation(BaseModel):
    prompt: str


app = FastAPI()
chat = Chat(model=llm)


@app.post("/llm_on_cpu")
async def stream(item: validation):
    chat.send_message(item.prompt)
    response = chat.generate_reply()

    return llm(response)