nabilcheikh1 commited on
Commit
65997b1
·
1 Parent(s): b96eea7

add delete_rows endpoint

Browse files
.gitignore CHANGED
@@ -1,2 +1,3 @@
1
  *pycache*
2
- *.env*
 
 
1
  *pycache*
2
+ *.env*
3
+ .python-version
src/api/models/embedding_models.py CHANGED
@@ -42,6 +42,11 @@ class DeleteEmbeddingRequest(BaseModel):
42
  dataset_name: str
43
 
44
 
 
 
 
 
 
45
  # Request model for the /embed endpoint
46
  class EmbedRequest(BaseModel):
47
  texts: List[str] # List of strings to generate embeddings for
 
42
  dataset_name: str
43
 
44
 
45
+ class DeleteRowsRequest(BaseModel):
46
+ dataset_name: str
47
+ product_types_to_delete: List[str]
48
+
49
+
50
  # Request model for the /embed endpoint
51
  class EmbedRequest(BaseModel):
52
  texts: List[str] # List of strings to generate embeddings for
src/api/services/huggingface_service.py CHANGED
@@ -100,3 +100,29 @@ class HuggingFaceService:
100
  except Exception as e:
101
  logger.error(f"Failed to delete dataset: {e}")
102
  raise DatasetDeleteError(f"Failed to delete dataset: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
100
  except Exception as e:
101
  logger.error(f"Failed to delete dataset: {e}")
102
  raise DatasetDeleteError(f"Failed to delete dataset: {e}")
103
+
104
+
105
+ async def delete_rows_from_dataset(self, dataset_name: str, product_types_to_delete: List[str]):
106
+ """
107
+ Loads a dataset, filters out rows based on a list of product types, and pushes it back.
108
+ """
109
+ if not product_types_to_delete:
110
+ return
111
+
112
+ # Step 1: Load the existing dataset
113
+ logger.info(f"Loading dataset {dataset_name} to delete rows.")
114
+ dataset = await self.read_dataset(dataset_name)
115
+
116
+ # Step 2 : Filter the dataset to EXCLUDE the rows with the given product_types
117
+ logger.info(f"Filtering out product_types: {product_types_to_delete}")
118
+ initial_row_count = len(dataset)
119
+
120
+ filtered_dataset = dataset.filter(lambda product: product['product_type'] not in product_types_to_delete)
121
+
122
+ final_row_count = len(filtered_dataset)
123
+ logger.info(f"{initial_row_count - final_row_count} rows deleted.")
124
+
125
+ # Step 3 : Push the modified dataset back to the hub
126
+ await self.push_to_hub(filtered_dataset, dataset_name)
127
+
128
+ return filtered_dataset
src/main.py CHANGED
@@ -9,6 +9,7 @@ from src.api.models.embedding_models import (
9
  CreateEmbeddingRequest,
10
  ReadEmbeddingRequest,
11
  UpdateEmbeddingRequest,
 
12
  DeleteEmbeddingRequest,
13
  EmbedRequest,
14
  SearchEmbeddingRequest,
@@ -230,6 +231,29 @@ async def delete_embeddings(
230
  raise HTTPException(status_code=500, detail=f"An error occurred: {e}")
231
 
232
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
233
  @app.post("/search_embeddings")
234
  async def search_embedding(
235
  request: SearchEmbeddingRequest,
 
9
  CreateEmbeddingRequest,
10
  ReadEmbeddingRequest,
11
  UpdateEmbeddingRequest,
12
+ DeleteRowsRequest,
13
  DeleteEmbeddingRequest,
14
  EmbedRequest,
15
  SearchEmbeddingRequest,
 
231
  raise HTTPException(status_code=500, detail=f"An error occurred: {e}")
232
 
233
 
234
+ @app.post("/delete_rows")
235
+ async def delete_rows(
236
+ request: DeleteRowsRequest,
237
+ huggingface_service: HuggingFaceService = Depends(get_huggingface_service)
238
+ ):
239
+ """
240
+ Deletes specific rows from a Hugging Face dataset based on their product_types"""
241
+ try:
242
+ await huggingface_service.delete_rows_from_dataset(
243
+ request.dataset_name, request.product_types_to_delete
244
+ )
245
+ return {
246
+ "message": "Rows deleted succesfully from dataset.",
247
+ "dataset_name": request.dataset_name,
248
+ "deleted_product_types": request.product_types_to_delete,
249
+ }
250
+ except DatasetNotFoundError as e:
251
+ raise HTTPException(status_code=404, detail=str(e))
252
+ except Exception as e:
253
+ logger.error(f"An error occured while deleting rows: {e}")
254
+ raise HTTPException(status_code=500, detail=str(e))
255
+
256
+
257
  @app.post("/search_embeddings")
258
  async def search_embedding(
259
  request: SearchEmbeddingRequest,