Spaces:

lpetrl
/

demo-llm

Sleeping

demo-llm / main.py

Petro

model

299f41e 9 months ago

525 Bytes

	from fastapi import FastAPI
	from llama_cpp import Llama
	from pydantic import BaseModel

	from chat import Chat

	model_path = "zephyr-7b-beta.Q4_K_S.gguf"

	llm = Llama(model_path=model_path, n_ctx=512, max_answer_len=100) # Set chat_format according to the model you are using


	class validation(BaseModel):
	prompt: str


	app = FastAPI()
	chat = Chat(model=llm)


	@app.post("/llm_on_cpu")
	async def stream(item: validation):
	chat.send_message(item.prompt)
	response = chat.generate_reply()

	return llm(response)