from fastapi import FastAPI | |
from llama_cpp import Llama | |
from pydantic import BaseModel | |
from chat import Chat | |
model_path = "zephyr-7b-beta.Q4_K_S.gguf" | |
llm = Llama(model_path=model_path, n_ctx=512, max_answer_len=100) # Set chat_format according to the model you are using | |
class validation(BaseModel): | |
prompt: str | |
app = FastAPI() | |
chat = Chat(model=llm) | |
async def stream(item: validation): | |
chat.send_message(item.prompt) | |
response = chat.generate_reply() | |
return llm(response) | |