phi3-sql / main.py
Do0rMaMu's picture
Update main.py
2338c5f verified
from fastapi import FastAPI
from pydantic import BaseModel, Field
from typing import List, Dict
from llama_cpp import Llama
import os
os.environ["HF_HOME"] = "./cache"
# Load the Llama model with the specified path and configuration
llm = Llama.from_pretrained(
repo_id="bartowski/Llama-3.2-3B-Instruct-GGUF", # Replace with the actual model repository ID
filename="Llama-3.2-3B-Instruct-Q8_0.gguf", # Replace with your actual model filename if necessary
n_ctx=4096,
cache_dir="./cache",
n_threads=2,
)
# Define a Pydantic model for request validation
class Message(BaseModel):
role: str # "user" or "assistant"
content: str # The actual message content
class Validation(BaseModel):
messages: List[Message] = Field(default_factory=list) # List of previous messages in the conversation
max_tokens: int = 1024 # Maximum tokens for the response
temperature: float = 0.01 # Model response temperature for creativity
# Initialize the FastAPI application
app = FastAPI()
# Define the endpoint for generating responses
@app.post("/generate_response")
async def generate_response(item: Validation):
# Generate a response using the Llama model with the chat history
response = llm.create_chat_completion(
messages=[{"role": msg.role, "content": msg.content} for msg in item.messages],
max_tokens=item.max_tokens,
temperature=item.temperature
)
# Extract and return the response text
return {"response": response['choices'][0]['message']['content']}