Spaces:
Running
Running
from pydantic import BaseModel | |
from typing import List, Dict, Optional | |
# Pydantic models for request validation | |
class CreateEmbeddingRequest(BaseModel): | |
query: str | |
target_column: str = "product_type" | |
output_column: str = "embedding" | |
model: str = "text-embedding-3-small" | |
batch_size: int = 10 | |
max_concurrent_requests: int = 10 | |
dataset_name: str = "re-mind/product_type_embedding" | |
class ReadEmbeddingRequest(BaseModel): | |
dataset_name: str | |
# class UpdateEmbeddingRequest(BaseModel): | |
# updates: Dict[str, List] # Column name -> List of values | |
# target_column: str = "product_type" | |
# output_column: str = "embedding" | |
# model: str = "text-embedding-3-small" | |
# batch_size: int = 10 | |
# max_concurrent_requests: int = 10 | |
# dataset_name: str = "re-mind/product_type_embedding" | |
class UpdateEmbeddingRequest(BaseModel): | |
dataset_name: str = "re-mind/product_type_embedding" | |
updates: Dict[ | |
str, List | |
] # Dictionary of column names and their corresponding values | |
target_column: str = ( | |
"product_type" # Column in the new data to generate embeddings for | |
) | |
output_column: str = "embedding" # Column to store the generated embeddings | |
class DeleteEmbeddingRequest(BaseModel): | |
dataset_name: str | |
# Request model for the /embed endpoint | |
class EmbedRequest(BaseModel): | |
texts: List[str] # List of strings to generate embeddings for | |
output_column: str = ( | |
"embedding" # Column to store embeddings (default: "embeddings") | |
) | |
class SearchEmbeddingRequest(BaseModel): | |
texts: List[str] # List of texts to search for | |
target_column: str # Column to return in the results | |
embedding_column: str # Column containing the embeddings to search against | |
num_results: int # Number of results to return | |
dataset_name: str # Name of the dataset to search in | |
additional_columns: Optional[List[str]] = ( | |
None # Optional list of additional columns to include in the results | |
) | |