Similarity_Search / src /api /models /embedding_models.py
amaye15
Feat - Additional Columns Returned
b96eea7
from pydantic import BaseModel
from typing import List, Dict, Optional
# Pydantic models for request validation
class CreateEmbeddingRequest(BaseModel):
query: str
target_column: str = "product_type"
output_column: str = "embedding"
model: str = "text-embedding-3-small"
batch_size: int = 10
max_concurrent_requests: int = 10
dataset_name: str = "re-mind/product_type_embedding"
class ReadEmbeddingRequest(BaseModel):
dataset_name: str
# class UpdateEmbeddingRequest(BaseModel):
# updates: Dict[str, List] # Column name -> List of values
# target_column: str = "product_type"
# output_column: str = "embedding"
# model: str = "text-embedding-3-small"
# batch_size: int = 10
# max_concurrent_requests: int = 10
# dataset_name: str = "re-mind/product_type_embedding"
class UpdateEmbeddingRequest(BaseModel):
dataset_name: str = "re-mind/product_type_embedding"
updates: Dict[
str, List
] # Dictionary of column names and their corresponding values
target_column: str = (
"product_type" # Column in the new data to generate embeddings for
)
output_column: str = "embedding" # Column to store the generated embeddings
class DeleteEmbeddingRequest(BaseModel):
dataset_name: str
# Request model for the /embed endpoint
class EmbedRequest(BaseModel):
texts: List[str] # List of strings to generate embeddings for
output_column: str = (
"embedding" # Column to store embeddings (default: "embeddings")
)
class SearchEmbeddingRequest(BaseModel):
texts: List[str] # List of texts to search for
target_column: str # Column to return in the results
embedding_column: str # Column containing the embeddings to search against
num_results: int # Number of results to return
dataset_name: str # Name of the dataset to search in
additional_columns: Optional[List[str]] = (
None # Optional list of additional columns to include in the results
)