import json from pathlib import Path from pydantic import BaseModel, Field, create_model, ConfigDict from typing import List, Optional, Dict, Union from time import time class QueryExpansionRequest(BaseModel): query: str system_message: Optional[str] = None # Load the template to create the response model template_path = Path(__file__).parent / "prompt_templates" / "query_expansion.json" with open(template_path) as f: template = json.load(f) # Create model configuration with proper typing model_config = ConfigDict( json_schema_extra={ 'example': template['example_response'] } ) # Create the response model based on the template's schema QueryExpansionResponse = create_model( 'QueryExpansionResponse', original_query=(str, ...), expanded_query=(str, ...), search_terms=(List[str], ...), call_rag=(bool, ...), model_config=model_config ) class ChatMessage(BaseModel): role: str content: str class ChatCompletionRequest(BaseModel): model: str messages: List[ChatMessage] stream: bool = False class ChatCompletionMessage(BaseModel): role: str = "assistant" content: str class ChatCompletionChoice(BaseModel): index: int = 0 message: ChatCompletionMessage logprobs: Optional[None] = None finish_reason: str = "stop" class CompletionTokenDetails(BaseModel): reasoning_tokens: int = 0 accepted_prediction_tokens: int = 0 rejected_prediction_tokens: int = 0 class CompletionUsage(BaseModel): prompt_tokens: int = 9 # Placeholder values completion_tokens: int = 12 total_tokens: int = 21 completion_tokens_details: CompletionTokenDetails = Field(default_factory=CompletionTokenDetails) class ChatCompletionResponse(BaseModel): id: str = Field(default="chatcmpl-123") object: str = "chat.completion" created: int = Field(default_factory=lambda: int(time())) model: str = "gpt-4o-mini" system_fingerprint: str = "fp_44709d6fcb" choices: List[ChatCompletionChoice] usage: CompletionUsage = Field(default_factory=CompletionUsage) @classmethod def from_response(cls, content: str, model: str = "gpt-4o-mini") -> "ChatCompletionResponse": """Create a ChatCompletionResponse from a simple response string""" return cls( model=model, choices=[ ChatCompletionChoice( message=ChatCompletionMessage(content=content) ) ] ) class GenerateRequest(BaseModel): prompt: str system_message: Optional[str] = None max_new_tokens: Optional[int] = None class EmbeddingRequest(BaseModel): text: str class EmbeddingResponse(BaseModel): embedding: List[float] dimension: int class ModelStatus(BaseModel): is_loaded: bool current_model: Optional[str] has_chat_template: Optional[bool] = None class EmbeddingModelStatus(BaseModel): is_loaded: bool current_model: Optional[str] class ModelStatusInfo(BaseModel): generation_model: ModelStatus embedding_model: EmbeddingModelStatus class SystemStatusResponse(BaseModel): """Pydantic model for system status response""" cpu: Optional[Dict[str, Union[float, str]]] = None memory: Optional[Dict[str, Union[float, str]]] = None gpu: Optional[Dict[str, Union[bool, str, float]]] = None storage: Optional[Dict[str, str]] = None model: ModelStatusInfo class ValidationResponse(BaseModel): config_validation: Dict[str, bool] model_validation: Dict[str, bool] folder_validation: Dict[str, bool] overall_status: str issues: List[str] class ChunkRerankRequest(BaseModel): query: str chunks: List[str] system_message: Optional[str] = None # Load example from template template_path = Path(__file__).parent / "prompt_templates" / "chunk_rerank.json" with open(template_path) as f: template = json.load(f) example = template['example_response'] class ChunkRerankResponse(BaseModel): """Response model for chunk reranking, based on template schema""" original_query: str = Field(..., description="The exact query being processed") ranked_chunks: List[str] = Field(..., description="Top 5 most relevant chunks in order of importance", max_items=5) got_chunks: bool = Field(..., description="Whether any relevant chunks were found") class Config: json_schema_extra = { "example": example }