Spaces:
Build error
Build error
from fastapi import FastAPI | |
from pydantic import BaseModel | |
# Assuming Llama class has been correctly imported and set up | |
from llama_cpp import Llama | |
# Model loading with specified path and configuration | |
llm = Llama( | |
model_path="Meta-Llama-3-8B-Instruct.Q4_K_M.gguf", # Update the path as necessary | |
n_ctx=4096, # Maximum number of tokens for context (input + output) | |
n_threads=2, # Number of CPU cores used | |
) | |
# Pydantic object for validation | |
class Validation(BaseModel): | |
user_prompt: str | |
system_prompt: str | |
max_tokens: int = 1024 | |
temperature: float = 0.01 | |
# FastAPI application initialization | |
app = FastAPI() | |
# Endpoint for generating responses | |
async def generate_response(item: Validation): | |
# Construct the complete prompt using the given system and user prompts | |
prompt = f"""<|begin_of_text|><|start_header_id|>system<|end_header_id|> \n | |
{ item.system_prompt }<|eot_id|> \n <|start_header_id|>user<|end_header_id|> | |
{ item.user_prompt }<|eot_id|> \n <|start_header_id|>assistant<|end_header_id|>""" | |
# Call the Llama model to generate a response | |
output = llm(prompt, max_tokens = item.max_tokens,temperature = item.temperature, echo=True) | |
# Extract and return the text from the response | |
return output['choices'][0]['text'] |