Spaces:

Do0rMaMu
/

phi3-sql

Runtime error

phi3-sql / main.py

Update main.py

2338c5f verified 8 months ago

1.54 kB

	from fastapi import FastAPI
	from pydantic import BaseModel, Field
	from typing import List, Dict
	from llama_cpp import Llama
	import os
	os.environ["HF_HOME"] = "./cache"


	# Load the Llama model with the specified path and configuration
	llm = Llama.from_pretrained(
	repo_id="bartowski/Llama-3.2-3B-Instruct-GGUF", # Replace with the actual model repository ID
	filename="Llama-3.2-3B-Instruct-Q8_0.gguf", # Replace with your actual model filename if necessary
	n_ctx=4096,
	cache_dir="./cache",
	n_threads=2,
	)

	# Define a Pydantic model for request validation
	class Message(BaseModel):
	role: str # "user" or "assistant"
	content: str # The actual message content

	class Validation(BaseModel):
	messages: List[Message] = Field(default_factory=list) # List of previous messages in the conversation
	max_tokens: int = 1024 # Maximum tokens for the response
	temperature: float = 0.01 # Model response temperature for creativity

	# Initialize the FastAPI application
	app = FastAPI()

	# Define the endpoint for generating responses
	@app.post("/generate_response")
	async def generate_response(item: Validation):
	# Generate a response using the Llama model with the chat history
	response = llm.create_chat_completion(
	messages=[{"role": msg.role, "content": msg.content} for msg in item.messages],
	max_tokens=item.max_tokens,
	temperature=item.temperature
	)

	# Extract and return the response text
	return {"response": response['choices'][0]['message']['content']}