Spaces:
Runtime error
Runtime error
import json | |
from pathlib import Path | |
from pydantic import BaseModel, Field, create_model, ConfigDict | |
from typing import List, Optional, Dict, Union | |
from time import time | |
class QueryExpansionRequest(BaseModel): | |
query: str | |
system_message: Optional[str] = None | |
# Load the template to create the response model | |
template_path = Path(__file__).parent / "prompt_templates" / "query_expansion.json" | |
with open(template_path) as f: | |
template = json.load(f) | |
# Create model configuration with proper typing | |
model_config = ConfigDict( | |
json_schema_extra={ | |
'example': template['example_response'] | |
} | |
) | |
# Create the response model based on the template's schema | |
QueryExpansionResponse = create_model( | |
'QueryExpansionResponse', | |
original_query=(str, ...), | |
expanded_query=(str, ...), | |
search_terms=(List[str], ...), | |
call_rag=(bool, ...), | |
model_config=model_config | |
) | |
class ChatMessage(BaseModel): | |
role: str | |
content: str | |
class ChatCompletionRequest(BaseModel): | |
model: str | |
messages: List[ChatMessage] | |
stream: bool = False | |
class ChatCompletionMessage(BaseModel): | |
role: str = "assistant" | |
content: str | |
class ChatCompletionChoice(BaseModel): | |
index: int = 0 | |
message: ChatCompletionMessage | |
logprobs: Optional[None] = None | |
finish_reason: str = "stop" | |
class CompletionTokenDetails(BaseModel): | |
reasoning_tokens: int = 0 | |
accepted_prediction_tokens: int = 0 | |
rejected_prediction_tokens: int = 0 | |
class CompletionUsage(BaseModel): | |
prompt_tokens: int = 9 # Placeholder values | |
completion_tokens: int = 12 | |
total_tokens: int = 21 | |
completion_tokens_details: CompletionTokenDetails = Field(default_factory=CompletionTokenDetails) | |
class ChatCompletionResponse(BaseModel): | |
id: str = Field(default="chatcmpl-123") | |
object: str = "chat.completion" | |
created: int = Field(default_factory=lambda: int(time())) | |
model: str = "gpt-4o-mini" | |
system_fingerprint: str = "fp_44709d6fcb" | |
choices: List[ChatCompletionChoice] | |
usage: CompletionUsage = Field(default_factory=CompletionUsage) | |
def from_response(cls, content: str, model: str = "gpt-4o-mini") -> "ChatCompletionResponse": | |
"""Create a ChatCompletionResponse from a simple response string""" | |
return cls( | |
model=model, | |
choices=[ | |
ChatCompletionChoice( | |
message=ChatCompletionMessage(content=content) | |
) | |
] | |
) | |
class GenerateRequest(BaseModel): | |
prompt: str | |
system_message: Optional[str] = None | |
max_new_tokens: Optional[int] = None | |
class EmbeddingRequest(BaseModel): | |
text: str | |
class EmbeddingResponse(BaseModel): | |
embedding: List[float] | |
dimension: int | |
class ModelStatus(BaseModel): | |
is_loaded: bool | |
current_model: Optional[str] | |
has_chat_template: Optional[bool] = None | |
class EmbeddingModelStatus(BaseModel): | |
is_loaded: bool | |
current_model: Optional[str] | |
class ModelStatusInfo(BaseModel): | |
generation_model: ModelStatus | |
embedding_model: EmbeddingModelStatus | |
class SystemStatusResponse(BaseModel): | |
"""Pydantic model for system status response""" | |
cpu: Optional[Dict[str, Union[float, str]]] = None | |
memory: Optional[Dict[str, Union[float, str]]] = None | |
gpu: Optional[Dict[str, Union[bool, str, float]]] = None | |
storage: Optional[Dict[str, str]] = None | |
model: ModelStatusInfo | |
class ValidationResponse(BaseModel): | |
config_validation: Dict[str, bool] | |
model_validation: Dict[str, bool] | |
folder_validation: Dict[str, bool] | |
overall_status: str | |
issues: List[str] | |
class ChunkRerankRequest(BaseModel): | |
query: str | |
chunks: List[str] | |
system_message: Optional[str] = None | |
# Load example from template | |
template_path = Path(__file__).parent / "prompt_templates" / "chunk_rerank.json" | |
with open(template_path) as f: | |
template = json.load(f) | |
example = template['example_response'] | |
class ChunkRerankResponse(BaseModel): | |
"""Response model for chunk reranking, based on template schema""" | |
original_query: str = Field(..., description="The exact query being processed") | |
ranked_chunks: List[str] = Field(..., description="Top 5 most relevant chunks in order of importance", max_items=5) | |
got_chunks: bool = Field(..., description="Whether any relevant chunks were found") | |
class Config: | |
json_schema_extra = { | |
"example": example | |
} |