File size: 636 Bytes
d0b184f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
from fastapi import FastAPI
from pydantic import BaseModel
from llama_cpp import Llama

app = FastAPI()

# Load the model
llm = Llama.from_pretrained(
    repo_id="unsloth/phi-4-GGUF",
    filename="phi-4-Q4_K_M.gguf",
)

# Define request model
class ChatRequest(BaseModel):
    system_prompt: str
    query: str

@app.post("/chat-p4q4")
async def chat(request: ChatRequest):
    response = llm.create_chat_completion(
        messages=[
            {"role": "system", "content": request.system_prompt},
            {"role": "user", "content": request.query},
        ]
    )
    return {"response": response}