File size: 784 Bytes
21839e1
2b25d5e
 
 
 
 
 
151b91e
2b25d5e
 
 
 
 
 
 
 
 
 
 
 
 
 
6801ccb
2b25d5e
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
from ctransformers import AutoModelForCausalLM
from fastapi import FastAPI, Form
from pydantic import BaseModel

# Model loading
llm = AutoModelForCausalLM.from_pretrained(
    "Meta-Llama-3-8B-Instruct.Q4_K_M.gguf",
    model_type='llama',
    max_new_tokens=1096,
    threads=3,
)
# Pydantic object
class Validation(BaseModel):
    user_prompt: str  # User's prompt
    system_prompt: str  # System's instruction

# FastAPI application
app = FastAPI()

# Endpoint for generating responses
@app.post("/generate_response")
async def generate_response(item: Validation):
    prompt = f"<s>[INST] <<SYS>> \n {item.system_prompt}<</SYS>> \n {item.user_prompt} [/INST]"
    return llm.generate(prompt, do_sample=True)  # Adjusted to include the generation method with do_sample if needed