File size: 636 Bytes
d0b184f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 |
from fastapi import FastAPI
from pydantic import BaseModel
from llama_cpp import Llama
app = FastAPI()
# Load the model
llm = Llama.from_pretrained(
repo_id="unsloth/phi-4-GGUF",
filename="phi-4-Q4_K_M.gguf",
)
# Define request model
class ChatRequest(BaseModel):
system_prompt: str
query: str
@app.post("/chat-p4q4")
async def chat(request: ChatRequest):
response = llm.create_chat_completion(
messages=[
{"role": "system", "content": request.system_prompt},
{"role": "user", "content": request.query},
]
)
return {"response": response}
|