nabilcheikh1 commited on
Commit
fdc0b3d
·
1 Parent(s): 65997b1

generalization of deleting rows for each dataset (marketplace manufacturer seller)

Browse files
src/api/models/embedding_models.py CHANGED
@@ -42,9 +42,10 @@ class DeleteEmbeddingRequest(BaseModel):
42
  dataset_name: str
43
 
44
 
45
- class DeleteRowsRequest(BaseModel):
46
  dataset_name: str
47
- product_types_to_delete: List[str]
 
48
 
49
 
50
  # Request model for the /embed endpoint
 
42
  dataset_name: str
43
 
44
 
45
+ class DeleteByColumnRequest(BaseModel):
46
  dataset_name: str
47
+ key_column: str
48
+ keys_to_delete: List[str]
49
 
50
 
51
  # Request model for the /embed endpoint
src/api/services/huggingface_service.py CHANGED
@@ -102,11 +102,11 @@ class HuggingFaceService:
102
  raise DatasetDeleteError(f"Failed to delete dataset: {e}")
103
 
104
 
105
- async def delete_rows_from_dataset(self, dataset_name: str, product_types_to_delete: List[str]):
106
  """
107
- Loads a dataset, filters out rows based on a list of product types, and pushes it back.
108
  """
109
- if not product_types_to_delete:
110
  return
111
 
112
  # Step 1: Load the existing dataset
@@ -114,10 +114,10 @@ class HuggingFaceService:
114
  dataset = await self.read_dataset(dataset_name)
115
 
116
  # Step 2 : Filter the dataset to EXCLUDE the rows with the given product_types
117
- logger.info(f"Filtering out product_types: {product_types_to_delete}")
118
  initial_row_count = len(dataset)
119
 
120
- filtered_dataset = dataset.filter(lambda product: product['product_type'] not in product_types_to_delete)
121
 
122
  final_row_count = len(filtered_dataset)
123
  logger.info(f"{initial_row_count - final_row_count} rows deleted.")
 
102
  raise DatasetDeleteError(f"Failed to delete dataset: {e}")
103
 
104
 
105
+ async def delete_rows_from_dataset(self, dataset_name: str, key_column: str, keys_to_delete: List[str]):
106
  """
107
+ Loads a dataset, filters out rows based on a list of keys in a specific column, and pushes it back.
108
  """
109
+ if not keys_to_delete:
110
  return
111
 
112
  # Step 1: Load the existing dataset
 
114
  dataset = await self.read_dataset(dataset_name)
115
 
116
  # Step 2 : Filter the dataset to EXCLUDE the rows with the given product_types
117
+ logger.info(f"Filtering out rows where column {key_column} is in {keys_to_delete}")
118
  initial_row_count = len(dataset)
119
 
120
+ filtered_dataset = dataset.filter(lambda element: element[key_column] not in keys_to_delete)
121
 
122
  final_row_count = len(filtered_dataset)
123
  logger.info(f"{initial_row_count - final_row_count} rows deleted.")
src/main.py CHANGED
@@ -7,9 +7,9 @@ from typing import List, Dict
7
  from datasets import Dataset
8
  from src.api.models.embedding_models import (
9
  CreateEmbeddingRequest,
 
10
  ReadEmbeddingRequest,
11
  UpdateEmbeddingRequest,
12
- DeleteRowsRequest,
13
  DeleteEmbeddingRequest,
14
  EmbedRequest,
15
  SearchEmbeddingRequest,
@@ -231,21 +231,22 @@ async def delete_embeddings(
231
  raise HTTPException(status_code=500, detail=f"An error occurred: {e}")
232
 
233
 
234
- @app.post("/delete_rows")
235
- async def delete_rows(
236
- request: DeleteRowsRequest,
237
  huggingface_service: HuggingFaceService = Depends(get_huggingface_service)
238
  ):
239
  """
240
- Deletes specific rows from a Hugging Face dataset based on their product_types"""
241
  try:
242
  await huggingface_service.delete_rows_from_dataset(
243
- request.dataset_name, request.product_types_to_delete
244
  )
245
  return {
246
  "message": "Rows deleted succesfully from dataset.",
247
  "dataset_name": request.dataset_name,
248
- "deleted_product_types": request.product_types_to_delete,
 
249
  }
250
  except DatasetNotFoundError as e:
251
  raise HTTPException(status_code=404, detail=str(e))
 
7
  from datasets import Dataset
8
  from src.api.models.embedding_models import (
9
  CreateEmbeddingRequest,
10
+ DeleteByColumnRequest,
11
  ReadEmbeddingRequest,
12
  UpdateEmbeddingRequest,
 
13
  DeleteEmbeddingRequest,
14
  EmbedRequest,
15
  SearchEmbeddingRequest,
 
231
  raise HTTPException(status_code=500, detail=f"An error occurred: {e}")
232
 
233
 
234
+ @app.post("/delete_rows_by_key")
235
+ async def delete_rows_by_key(
236
+ request: DeleteByColumnRequest,
237
  huggingface_service: HuggingFaceService = Depends(get_huggingface_service)
238
  ):
239
  """
240
+ Deletes specific rows from a Hugging Face dataset based on a key column and values."""
241
  try:
242
  await huggingface_service.delete_rows_from_dataset(
243
+ request.dataset_name, request.key_column, request.keys_to_delete
244
  )
245
  return {
246
  "message": "Rows deleted succesfully from dataset.",
247
  "dataset_name": request.dataset_name,
248
+ "key_column": request.key_column,
249
+ "deleted_keys": request.keys_to_delete,
250
  }
251
  except DatasetNotFoundError as e:
252
  raise HTTPException(status_code=404, detail=str(e))