Spaces:

re-mind
/

Similarity_Search

Running

App Files Files Community

Similarity_Search / src /api /models /embedding_models.py

nabilcheikh1

correction of columns of datasets

054d2a0 12 days ago

raw

history blame contribute delete

2.27 kB

	from pydantic import BaseModel
	from typing import List, Dict, Optional


	# Pydantic models for request validation
	class CreateEmbeddingRequest(BaseModel):
	query: str
	target_column: str = "product_type"
	output_column: str = "embedding"
	model: str = "text-embedding-3-small"
	batch_size: int = 10
	max_concurrent_requests: int = 10
	dataset_name: str = "re-mind/product_type_embedding"


	class ReadEmbeddingRequest(BaseModel):
	dataset_name: str


	# class UpdateEmbeddingRequest(BaseModel):
	# updates: Dict[str, List] # Column name -> List of values
	# target_column: str = "product_type"
	# output_column: str = "embedding"
	# model: str = "text-embedding-3-small"
	# batch_size: int = 10
	# max_concurrent_requests: int = 10
	# dataset_name: str = "re-mind/product_type_embedding"


	class UpdateEmbeddingRequest(BaseModel):
	dataset_name: str = "re-mind/product_type_embedding"
	updates: Dict[
	str, List
	] # Dictionary of column names and their corresponding values
	target_column: str = (
	"product_type" # Column in the new data to generate embeddings for
	)
	output_column: str = "embedding" # Column to store the generated embeddings


	class DeleteEmbeddingRequest(BaseModel):
	dataset_name: str


	class DeleteByColumnRequest(BaseModel):
	dataset_name: str
	key_column: str
	keys_to_delete: List[str]


	# Request model for the /embed endpoint
	class EmbedRequest(BaseModel):
	texts: List[str] # List of strings to generate embeddings for
	output_column: str = (
	"embedding" # Column to store embeddings (default: "embeddings")
	)


	class SearchEmbeddingRequest(BaseModel):
	texts: List[str] # List of texts to search for
	target_column: str # Column to return in the results
	embedding_column: str # Column containing the embeddings to search against
	num_results: int # Number of results to return
	dataset_name: str # Name of the dataset to search in
	additional_columns: Optional[List[str]] = (
	None # Optional list of additional columns to include in the results
	)


	class ResetEmbeddingsRequest(BaseModel):
	dataset_name: str = "re-mind/product_type_embedding"
	target_column: str = "product_type"