Spaces:

TeamGenKI
/

Inference-API

Runtime error

App Files Files Community

Inference-API / main /schemas.py

AurelioAguirre

Adding query expansion and reranker

eb5a3fb 6 months ago

raw

history blame

4.45 kB

	import json
	from pathlib import Path
	from pydantic import BaseModel, Field, create_model, ConfigDict
	from typing import List, Optional, Dict, Union
	from time import time

	class QueryExpansionRequest(BaseModel):
	query: str
	system_message: Optional[str] = None

	# Load the template to create the response model
	template_path = Path(__file__).parent / "prompt_templates" / "query_expansion.json"
	with open(template_path) as f:
	template = json.load(f)

	# Create model configuration with proper typing
	model_config = ConfigDict(
	json_schema_extra={
	'example': template['example_response']
	}
	)

	# Create the response model based on the template's schema
	QueryExpansionResponse = create_model(
	'QueryExpansionResponse',
	original_query=(str, ...),
	expanded_query=(str, ...),
	search_terms=(List[str], ...),
	call_rag=(bool, ...),
	model_config=model_config
	)

	class ChatMessage(BaseModel):
	role: str
	content: str

	class ChatCompletionRequest(BaseModel):
	model: str
	messages: List[ChatMessage]
	stream: bool = False

	class ChatCompletionMessage(BaseModel):
	role: str = "assistant"
	content: str

	class ChatCompletionChoice(BaseModel):
	index: int = 0
	message: ChatCompletionMessage
	logprobs: Optional[None] = None
	finish_reason: str = "stop"

	class CompletionTokenDetails(BaseModel):
	reasoning_tokens: int = 0
	accepted_prediction_tokens: int = 0
	rejected_prediction_tokens: int = 0

	class CompletionUsage(BaseModel):
	prompt_tokens: int = 9 # Placeholder values
	completion_tokens: int = 12
	total_tokens: int = 21
	completion_tokens_details: CompletionTokenDetails = Field(default_factory=CompletionTokenDetails)

	class ChatCompletionResponse(BaseModel):
	id: str = Field(default="chatcmpl-123")
	object: str = "chat.completion"
	created: int = Field(default_factory=lambda: int(time()))
	model: str = "gpt-4o-mini"
	system_fingerprint: str = "fp_44709d6fcb"
	choices: List[ChatCompletionChoice]
	usage: CompletionUsage = Field(default_factory=CompletionUsage)

	@classmethod
	def from_response(cls, content: str, model: str = "gpt-4o-mini") -> "ChatCompletionResponse":
	"""Create a ChatCompletionResponse from a simple response string"""
	return cls(
	model=model,
	choices=[
	ChatCompletionChoice(
	message=ChatCompletionMessage(content=content)
	)
	]
	)

	class GenerateRequest(BaseModel):
	prompt: str
	system_message: Optional[str] = None
	max_new_tokens: Optional[int] = None

	class EmbeddingRequest(BaseModel):
	text: str

	class EmbeddingResponse(BaseModel):
	embedding: List[float]
	dimension: int

	class ModelStatus(BaseModel):
	is_loaded: bool
	current_model: Optional[str]
	has_chat_template: Optional[bool] = None

	class EmbeddingModelStatus(BaseModel):
	is_loaded: bool
	current_model: Optional[str]

	class ModelStatusInfo(BaseModel):
	generation_model: ModelStatus
	embedding_model: EmbeddingModelStatus

	class SystemStatusResponse(BaseModel):
	"""Pydantic model for system status response"""
	cpu: Optional[Dict[str, Union[float, str]]] = None
	memory: Optional[Dict[str, Union[float, str]]] = None
	gpu: Optional[Dict[str, Union[bool, str, float]]] = None
	storage: Optional[Dict[str, str]] = None
	model: ModelStatusInfo

	class ValidationResponse(BaseModel):
	config_validation: Dict[str, bool]
	model_validation: Dict[str, bool]
	folder_validation: Dict[str, bool]
	overall_status: str
	issues: List[str]

	class ChunkRerankRequest(BaseModel):
	query: str
	chunks: List[str]
	system_message: Optional[str] = None

	# Load example from template
	template_path = Path(__file__).parent / "prompt_templates" / "chunk_rerank.json"
	with open(template_path) as f:
	template = json.load(f)
	example = template['example_response']

	class ChunkRerankResponse(BaseModel):
	"""Response model for chunk reranking, based on template schema"""
	original_query: str = Field(..., description="The exact query being processed")
	ranked_chunks: List[str] = Field(..., description="Top 5 most relevant chunks in order of importance", max_items=5)
	got_chunks: bool = Field(..., description="Whether any relevant chunks were found")

	class Config:
	json_schema_extra = {
	"example": example
	}