Inference-API / main /schemas.py
AurelioAguirre's picture
added openAI schema based endpoint and response
02fd6bb
raw
history blame
2.54 kB
from pydantic import BaseModel, Field
from typing import List, Optional, Dict, Union, Literal
from time import time
class ChatMessage(BaseModel):
role: str
content: str
class ChatCompletionRequest(BaseModel):
model: str
messages: List[ChatMessage]
stream: bool = False
class ChatCompletionMessage(BaseModel):
role: str = "assistant"
content: str
class ChatCompletionChoice(BaseModel):
index: int = 0
message: ChatCompletionMessage
logprobs: Optional[None] = None
finish_reason: str = "stop"
class CompletionTokenDetails(BaseModel):
reasoning_tokens: int = 0
accepted_prediction_tokens: int = 0
rejected_prediction_tokens: int = 0
class CompletionUsage(BaseModel):
prompt_tokens: int = 9 # Placeholder values
completion_tokens: int = 12
total_tokens: int = 21
completion_tokens_details: CompletionTokenDetails = Field(default_factory=CompletionTokenDetails)
class ChatCompletionResponse(BaseModel):
id: str = Field(default="chatcmpl-123")
object: str = "chat.completion"
created: int = Field(default_factory=lambda: int(time()))
model: str = "gpt-4o-mini"
system_fingerprint: str = "fp_44709d6fcb"
choices: List[ChatCompletionChoice]
usage: CompletionUsage = Field(default_factory=CompletionUsage)
@classmethod
def from_response(cls, content: str, model: str = "gpt-4o-mini") -> "ChatCompletionResponse":
"""Create a ChatCompletionResponse from a simple response string"""
return cls(
model=model,
choices=[
ChatCompletionChoice(
message=ChatCompletionMessage(content=content)
)
]
)
class GenerateRequest(BaseModel):
prompt: str
system_message: Optional[str] = None
max_new_tokens: Optional[int] = None
class EmbeddingRequest(BaseModel):
text: str
class EmbeddingResponse(BaseModel):
embedding: List[float]
dimension: int
class SystemStatusResponse(BaseModel):
"""Pydantic model for system status response"""
cpu: Optional[Dict[str, Union[float, str]]] = None
memory: Optional[Dict[str, Union[float, str]]] = None
gpu: Optional[Dict[str, Union[bool, str, float]]] = None
storage: Optional[Dict[str, str]] = None
model: Optional[Dict[str, Union[bool, str]]] = None
class ValidationResponse(BaseModel):
config_validation: Dict[str, bool]
model_validation: Dict[str, bool]
folder_validation: Dict[str, bool]
overall_status: str
issues: List[str]