Inference-API / main /schemas.py
AurelioAguirre's picture
Adding query expansion and reranker
eb5a3fb
raw
history blame
4.45 kB
import json
from pathlib import Path
from pydantic import BaseModel, Field, create_model, ConfigDict
from typing import List, Optional, Dict, Union
from time import time
class QueryExpansionRequest(BaseModel):
query: str
system_message: Optional[str] = None
# Load the template to create the response model
template_path = Path(__file__).parent / "prompt_templates" / "query_expansion.json"
with open(template_path) as f:
template = json.load(f)
# Create model configuration with proper typing
model_config = ConfigDict(
json_schema_extra={
'example': template['example_response']
}
)
# Create the response model based on the template's schema
QueryExpansionResponse = create_model(
'QueryExpansionResponse',
original_query=(str, ...),
expanded_query=(str, ...),
search_terms=(List[str], ...),
call_rag=(bool, ...),
model_config=model_config
)
class ChatMessage(BaseModel):
role: str
content: str
class ChatCompletionRequest(BaseModel):
model: str
messages: List[ChatMessage]
stream: bool = False
class ChatCompletionMessage(BaseModel):
role: str = "assistant"
content: str
class ChatCompletionChoice(BaseModel):
index: int = 0
message: ChatCompletionMessage
logprobs: Optional[None] = None
finish_reason: str = "stop"
class CompletionTokenDetails(BaseModel):
reasoning_tokens: int = 0
accepted_prediction_tokens: int = 0
rejected_prediction_tokens: int = 0
class CompletionUsage(BaseModel):
prompt_tokens: int = 9 # Placeholder values
completion_tokens: int = 12
total_tokens: int = 21
completion_tokens_details: CompletionTokenDetails = Field(default_factory=CompletionTokenDetails)
class ChatCompletionResponse(BaseModel):
id: str = Field(default="chatcmpl-123")
object: str = "chat.completion"
created: int = Field(default_factory=lambda: int(time()))
model: str = "gpt-4o-mini"
system_fingerprint: str = "fp_44709d6fcb"
choices: List[ChatCompletionChoice]
usage: CompletionUsage = Field(default_factory=CompletionUsage)
@classmethod
def from_response(cls, content: str, model: str = "gpt-4o-mini") -> "ChatCompletionResponse":
"""Create a ChatCompletionResponse from a simple response string"""
return cls(
model=model,
choices=[
ChatCompletionChoice(
message=ChatCompletionMessage(content=content)
)
]
)
class GenerateRequest(BaseModel):
prompt: str
system_message: Optional[str] = None
max_new_tokens: Optional[int] = None
class EmbeddingRequest(BaseModel):
text: str
class EmbeddingResponse(BaseModel):
embedding: List[float]
dimension: int
class ModelStatus(BaseModel):
is_loaded: bool
current_model: Optional[str]
has_chat_template: Optional[bool] = None
class EmbeddingModelStatus(BaseModel):
is_loaded: bool
current_model: Optional[str]
class ModelStatusInfo(BaseModel):
generation_model: ModelStatus
embedding_model: EmbeddingModelStatus
class SystemStatusResponse(BaseModel):
"""Pydantic model for system status response"""
cpu: Optional[Dict[str, Union[float, str]]] = None
memory: Optional[Dict[str, Union[float, str]]] = None
gpu: Optional[Dict[str, Union[bool, str, float]]] = None
storage: Optional[Dict[str, str]] = None
model: ModelStatusInfo
class ValidationResponse(BaseModel):
config_validation: Dict[str, bool]
model_validation: Dict[str, bool]
folder_validation: Dict[str, bool]
overall_status: str
issues: List[str]
class ChunkRerankRequest(BaseModel):
query: str
chunks: List[str]
system_message: Optional[str] = None
# Load example from template
template_path = Path(__file__).parent / "prompt_templates" / "chunk_rerank.json"
with open(template_path) as f:
template = json.load(f)
example = template['example_response']
class ChunkRerankResponse(BaseModel):
"""Response model for chunk reranking, based on template schema"""
original_query: str = Field(..., description="The exact query being processed")
ranked_chunks: List[str] = Field(..., description="Top 5 most relevant chunks in order of importance", max_items=5)
got_chunks: bool = Field(..., description="Whether any relevant chunks were found")
class Config:
json_schema_extra = {
"example": example
}