Spaces:
Build error
Build error
File size: 784 Bytes
21839e1 2b25d5e 151b91e 2b25d5e 6801ccb 2b25d5e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 |
from ctransformers import AutoModelForCausalLM
from fastapi import FastAPI, Form
from pydantic import BaseModel
# Model loading
llm = AutoModelForCausalLM.from_pretrained(
"Meta-Llama-3-8B-Instruct.Q4_K_M.gguf",
model_type='llama',
max_new_tokens=1096,
threads=3,
)
# Pydantic object
class Validation(BaseModel):
user_prompt: str # User's prompt
system_prompt: str # System's instruction
# FastAPI application
app = FastAPI()
# Endpoint for generating responses
@app.post("/generate_response")
async def generate_response(item: Validation):
prompt = f"<s>[INST] <<SYS>> \n {item.system_prompt}<</SYS>> \n {item.user_prompt} [/INST]"
return llm.generate(prompt, do_sample=True) # Adjusted to include the generation method with do_sample if needed
|