Spaces:
Runtime error
Runtime error
File size: 4,452 Bytes
eb5a3fb ecd2385 02fd6bb eb5a3fb 02fd6bb 47031d7 ecd2385 47031d7 ecd2385 47031d7 eb5a3fb |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 |
import json
from pathlib import Path
from pydantic import BaseModel, Field, create_model, ConfigDict
from typing import List, Optional, Dict, Union
from time import time
class QueryExpansionRequest(BaseModel):
query: str
system_message: Optional[str] = None
# Load the template to create the response model
template_path = Path(__file__).parent / "prompt_templates" / "query_expansion.json"
with open(template_path) as f:
template = json.load(f)
# Create model configuration with proper typing
model_config = ConfigDict(
json_schema_extra={
'example': template['example_response']
}
)
# Create the response model based on the template's schema
QueryExpansionResponse = create_model(
'QueryExpansionResponse',
original_query=(str, ...),
expanded_query=(str, ...),
search_terms=(List[str], ...),
call_rag=(bool, ...),
model_config=model_config
)
class ChatMessage(BaseModel):
role: str
content: str
class ChatCompletionRequest(BaseModel):
model: str
messages: List[ChatMessage]
stream: bool = False
class ChatCompletionMessage(BaseModel):
role: str = "assistant"
content: str
class ChatCompletionChoice(BaseModel):
index: int = 0
message: ChatCompletionMessage
logprobs: Optional[None] = None
finish_reason: str = "stop"
class CompletionTokenDetails(BaseModel):
reasoning_tokens: int = 0
accepted_prediction_tokens: int = 0
rejected_prediction_tokens: int = 0
class CompletionUsage(BaseModel):
prompt_tokens: int = 9 # Placeholder values
completion_tokens: int = 12
total_tokens: int = 21
completion_tokens_details: CompletionTokenDetails = Field(default_factory=CompletionTokenDetails)
class ChatCompletionResponse(BaseModel):
id: str = Field(default="chatcmpl-123")
object: str = "chat.completion"
created: int = Field(default_factory=lambda: int(time()))
model: str = "gpt-4o-mini"
system_fingerprint: str = "fp_44709d6fcb"
choices: List[ChatCompletionChoice]
usage: CompletionUsage = Field(default_factory=CompletionUsage)
@classmethod
def from_response(cls, content: str, model: str = "gpt-4o-mini") -> "ChatCompletionResponse":
"""Create a ChatCompletionResponse from a simple response string"""
return cls(
model=model,
choices=[
ChatCompletionChoice(
message=ChatCompletionMessage(content=content)
)
]
)
class GenerateRequest(BaseModel):
prompt: str
system_message: Optional[str] = None
max_new_tokens: Optional[int] = None
class EmbeddingRequest(BaseModel):
text: str
class EmbeddingResponse(BaseModel):
embedding: List[float]
dimension: int
class ModelStatus(BaseModel):
is_loaded: bool
current_model: Optional[str]
has_chat_template: Optional[bool] = None
class EmbeddingModelStatus(BaseModel):
is_loaded: bool
current_model: Optional[str]
class ModelStatusInfo(BaseModel):
generation_model: ModelStatus
embedding_model: EmbeddingModelStatus
class SystemStatusResponse(BaseModel):
"""Pydantic model for system status response"""
cpu: Optional[Dict[str, Union[float, str]]] = None
memory: Optional[Dict[str, Union[float, str]]] = None
gpu: Optional[Dict[str, Union[bool, str, float]]] = None
storage: Optional[Dict[str, str]] = None
model: ModelStatusInfo
class ValidationResponse(BaseModel):
config_validation: Dict[str, bool]
model_validation: Dict[str, bool]
folder_validation: Dict[str, bool]
overall_status: str
issues: List[str]
class ChunkRerankRequest(BaseModel):
query: str
chunks: List[str]
system_message: Optional[str] = None
# Load example from template
template_path = Path(__file__).parent / "prompt_templates" / "chunk_rerank.json"
with open(template_path) as f:
template = json.load(f)
example = template['example_response']
class ChunkRerankResponse(BaseModel):
"""Response model for chunk reranking, based on template schema"""
original_query: str = Field(..., description="The exact query being processed")
ranked_chunks: List[str] = Field(..., description="Top 5 most relevant chunks in order of importance", max_items=5)
got_chunks: bool = Field(..., description="Whether any relevant chunks were found")
class Config:
json_schema_extra = {
"example": example
} |