Spaces:

TeamGenKI
/

Inference-API

Runtime error

File size: 4,452 Bytes

import json
from pathlib import Path
from pydantic import BaseModel, Field, create_model, ConfigDict
from typing import List, Optional, Dict, Union
from time import time

class QueryExpansionRequest(BaseModel):
    query: str
    system_message: Optional[str] = None

# Load the template to create the response model
template_path = Path(__file__).parent / "prompt_templates" / "query_expansion.json"
with open(template_path) as f:
    template = json.load(f)

# Create model configuration with proper typing
model_config = ConfigDict(
    json_schema_extra={
        'example': template['example_response']
    }
)

# Create the response model based on the template's schema
QueryExpansionResponse = create_model(
    'QueryExpansionResponse',
    original_query=(str, ...),
    expanded_query=(str, ...),
    search_terms=(List[str], ...),
    call_rag=(bool, ...),
    model_config=model_config
)

class ChatMessage(BaseModel):
    role: str
    content: str

class ChatCompletionRequest(BaseModel):
    model: str
    messages: List[ChatMessage]
    stream: bool = False

class ChatCompletionMessage(BaseModel):
    role: str = "assistant"
    content: str

class ChatCompletionChoice(BaseModel):
    index: int = 0
    message: ChatCompletionMessage
    logprobs: Optional[None] = None
    finish_reason: str = "stop"

class CompletionTokenDetails(BaseModel):
    reasoning_tokens: int = 0
    accepted_prediction_tokens: int = 0
    rejected_prediction_tokens: int = 0

class CompletionUsage(BaseModel):
    prompt_tokens: int = 9  # Placeholder values
    completion_tokens: int = 12
    total_tokens: int = 21
    completion_tokens_details: CompletionTokenDetails = Field(default_factory=CompletionTokenDetails)

class ChatCompletionResponse(BaseModel):
    id: str = Field(default="chatcmpl-123")
    object: str = "chat.completion"
    created: int = Field(default_factory=lambda: int(time()))
    model: str = "gpt-4o-mini"
    system_fingerprint: str = "fp_44709d6fcb"
    choices: List[ChatCompletionChoice]
    usage: CompletionUsage = Field(default_factory=CompletionUsage)

    @classmethod
    def from_response(cls, content: str, model: str = "gpt-4o-mini") -> "ChatCompletionResponse":
        """Create a ChatCompletionResponse from a simple response string"""
        return cls(
            model=model,
            choices=[
                ChatCompletionChoice(
                    message=ChatCompletionMessage(content=content)
                )
            ]
        )

class GenerateRequest(BaseModel):
    prompt: str
    system_message: Optional[str] = None
    max_new_tokens: Optional[int] = None

class EmbeddingRequest(BaseModel):
    text: str

class EmbeddingResponse(BaseModel):
    embedding: List[float]
    dimension: int

class ModelStatus(BaseModel):
    is_loaded: bool
    current_model: Optional[str]
    has_chat_template: Optional[bool] = None

class EmbeddingModelStatus(BaseModel):
    is_loaded: bool
    current_model: Optional[str]

class ModelStatusInfo(BaseModel):
    generation_model: ModelStatus
    embedding_model: EmbeddingModelStatus

class SystemStatusResponse(BaseModel):
    """Pydantic model for system status response"""
    cpu: Optional[Dict[str, Union[float, str]]] = None
    memory: Optional[Dict[str, Union[float, str]]] = None
    gpu: Optional[Dict[str, Union[bool, str, float]]] = None
    storage: Optional[Dict[str, str]] = None
    model: ModelStatusInfo

class ValidationResponse(BaseModel):
    config_validation: Dict[str, bool]
    model_validation: Dict[str, bool]
    folder_validation: Dict[str, bool]
    overall_status: str
    issues: List[str]

class ChunkRerankRequest(BaseModel):
    query: str
    chunks: List[str]
    system_message: Optional[str] = None

# Load example from template
template_path = Path(__file__).parent / "prompt_templates" / "chunk_rerank.json"
with open(template_path) as f:
    template = json.load(f)
    example = template['example_response']

class ChunkRerankResponse(BaseModel):
    """Response model for chunk reranking, based on template schema"""
    original_query: str = Field(..., description="The exact query being processed")
    ranked_chunks: List[str] = Field(..., description="Top 5 most relevant chunks in order of importance", max_items=5)
    got_chunks: bool = Field(..., description="Whether any relevant chunks were found")

    class Config:
        json_schema_extra = {
            "example": example
        }