from pydantic import BaseModel from typing import List, Dict, Optional # Pydantic models for request validation class CreateEmbeddingRequest(BaseModel): query: str target_column: str = "product_type" output_column: str = "embedding" model: str = "text-embedding-3-small" batch_size: int = 10 max_concurrent_requests: int = 10 dataset_name: str = "re-mind/product_type_embedding" class ReadEmbeddingRequest(BaseModel): dataset_name: str # class UpdateEmbeddingRequest(BaseModel): # updates: Dict[str, List] # Column name -> List of values # target_column: str = "product_type" # output_column: str = "embedding" # model: str = "text-embedding-3-small" # batch_size: int = 10 # max_concurrent_requests: int = 10 # dataset_name: str = "re-mind/product_type_embedding" class UpdateEmbeddingRequest(BaseModel): dataset_name: str = "re-mind/product_type_embedding" updates: Dict[ str, List ] # Dictionary of column names and their corresponding values target_column: str = ( "product_type" # Column in the new data to generate embeddings for ) output_column: str = "embedding" # Column to store the generated embeddings class DeleteEmbeddingRequest(BaseModel): dataset_name: str # Request model for the /embed endpoint class EmbedRequest(BaseModel): texts: List[str] # List of strings to generate embeddings for output_column: str = ( "embedding" # Column to store embeddings (default: "embeddings") ) class SearchEmbeddingRequest(BaseModel): texts: List[str] # List of texts to search for target_column: str # Column to return in the results embedding_column: str # Column containing the embeddings to search against num_results: int # Number of results to return dataset_name: str # Name of the dataset to search in additional_columns: Optional[List[str]] = ( None # Optional list of additional columns to include in the results )