from fastapi import FastAPI | |
from pydantic import BaseModel | |
from llama_cpp import Llama | |
# Model loading with specified path and configuration | |
llm = Llama( | |
model_path="Llama-3.2-3B-Instruct-Q8_0.gguf", # Update the path as necessary | |
n_ctx=4096, | |
n_threads=2, | |
) | |
# Pydantic object for validation | |
class Validation(BaseModel): | |
user_prompt: str # This will be the direct SQL query request or relevant prompt | |
max_tokens: int = 1024 | |
temperature: float = 0.01 | |
# FastAPI application initialization | |
app = FastAPI() | |
# Endpoint for generating responses | |
async def generate_response(item: Validation): | |
# Call the Llama model to generate a response directly based on the user's prompt | |
output = llm(item.user_prompt, max_tokens=item.max_tokens, temperature=item.temperature, echo=False) | |
# Extract and return the text from the response | |
return output['choices'][0]['text'] | |