Spaces:
Running
Running
Commit
·
65997b1
1
Parent(s):
b96eea7
add delete_rows endpoint
Browse files- .gitignore +2 -1
- src/api/models/embedding_models.py +5 -0
- src/api/services/huggingface_service.py +26 -0
- src/main.py +24 -0
.gitignore
CHANGED
@@ -1,2 +1,3 @@
|
|
1 |
*pycache*
|
2 |
-
*.env*
|
|
|
|
1 |
*pycache*
|
2 |
+
*.env*
|
3 |
+
.python-version
|
src/api/models/embedding_models.py
CHANGED
@@ -42,6 +42,11 @@ class DeleteEmbeddingRequest(BaseModel):
|
|
42 |
dataset_name: str
|
43 |
|
44 |
|
|
|
|
|
|
|
|
|
|
|
45 |
# Request model for the /embed endpoint
|
46 |
class EmbedRequest(BaseModel):
|
47 |
texts: List[str] # List of strings to generate embeddings for
|
|
|
42 |
dataset_name: str
|
43 |
|
44 |
|
45 |
+
class DeleteRowsRequest(BaseModel):
|
46 |
+
dataset_name: str
|
47 |
+
product_types_to_delete: List[str]
|
48 |
+
|
49 |
+
|
50 |
# Request model for the /embed endpoint
|
51 |
class EmbedRequest(BaseModel):
|
52 |
texts: List[str] # List of strings to generate embeddings for
|
src/api/services/huggingface_service.py
CHANGED
@@ -100,3 +100,29 @@ class HuggingFaceService:
|
|
100 |
except Exception as e:
|
101 |
logger.error(f"Failed to delete dataset: {e}")
|
102 |
raise DatasetDeleteError(f"Failed to delete dataset: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
except Exception as e:
|
101 |
logger.error(f"Failed to delete dataset: {e}")
|
102 |
raise DatasetDeleteError(f"Failed to delete dataset: {e}")
|
103 |
+
|
104 |
+
|
105 |
+
async def delete_rows_from_dataset(self, dataset_name: str, product_types_to_delete: List[str]):
|
106 |
+
"""
|
107 |
+
Loads a dataset, filters out rows based on a list of product types, and pushes it back.
|
108 |
+
"""
|
109 |
+
if not product_types_to_delete:
|
110 |
+
return
|
111 |
+
|
112 |
+
# Step 1: Load the existing dataset
|
113 |
+
logger.info(f"Loading dataset {dataset_name} to delete rows.")
|
114 |
+
dataset = await self.read_dataset(dataset_name)
|
115 |
+
|
116 |
+
# Step 2 : Filter the dataset to EXCLUDE the rows with the given product_types
|
117 |
+
logger.info(f"Filtering out product_types: {product_types_to_delete}")
|
118 |
+
initial_row_count = len(dataset)
|
119 |
+
|
120 |
+
filtered_dataset = dataset.filter(lambda product: product['product_type'] not in product_types_to_delete)
|
121 |
+
|
122 |
+
final_row_count = len(filtered_dataset)
|
123 |
+
logger.info(f"{initial_row_count - final_row_count} rows deleted.")
|
124 |
+
|
125 |
+
# Step 3 : Push the modified dataset back to the hub
|
126 |
+
await self.push_to_hub(filtered_dataset, dataset_name)
|
127 |
+
|
128 |
+
return filtered_dataset
|
src/main.py
CHANGED
@@ -9,6 +9,7 @@ from src.api.models.embedding_models import (
|
|
9 |
CreateEmbeddingRequest,
|
10 |
ReadEmbeddingRequest,
|
11 |
UpdateEmbeddingRequest,
|
|
|
12 |
DeleteEmbeddingRequest,
|
13 |
EmbedRequest,
|
14 |
SearchEmbeddingRequest,
|
@@ -230,6 +231,29 @@ async def delete_embeddings(
|
|
230 |
raise HTTPException(status_code=500, detail=f"An error occurred: {e}")
|
231 |
|
232 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
233 |
@app.post("/search_embeddings")
|
234 |
async def search_embedding(
|
235 |
request: SearchEmbeddingRequest,
|
|
|
9 |
CreateEmbeddingRequest,
|
10 |
ReadEmbeddingRequest,
|
11 |
UpdateEmbeddingRequest,
|
12 |
+
DeleteRowsRequest,
|
13 |
DeleteEmbeddingRequest,
|
14 |
EmbedRequest,
|
15 |
SearchEmbeddingRequest,
|
|
|
231 |
raise HTTPException(status_code=500, detail=f"An error occurred: {e}")
|
232 |
|
233 |
|
234 |
+
@app.post("/delete_rows")
|
235 |
+
async def delete_rows(
|
236 |
+
request: DeleteRowsRequest,
|
237 |
+
huggingface_service: HuggingFaceService = Depends(get_huggingface_service)
|
238 |
+
):
|
239 |
+
"""
|
240 |
+
Deletes specific rows from a Hugging Face dataset based on their product_types"""
|
241 |
+
try:
|
242 |
+
await huggingface_service.delete_rows_from_dataset(
|
243 |
+
request.dataset_name, request.product_types_to_delete
|
244 |
+
)
|
245 |
+
return {
|
246 |
+
"message": "Rows deleted succesfully from dataset.",
|
247 |
+
"dataset_name": request.dataset_name,
|
248 |
+
"deleted_product_types": request.product_types_to_delete,
|
249 |
+
}
|
250 |
+
except DatasetNotFoundError as e:
|
251 |
+
raise HTTPException(status_code=404, detail=str(e))
|
252 |
+
except Exception as e:
|
253 |
+
logger.error(f"An error occured while deleting rows: {e}")
|
254 |
+
raise HTTPException(status_code=500, detail=str(e))
|
255 |
+
|
256 |
+
|
257 |
@app.post("/search_embeddings")
|
258 |
async def search_embedding(
|
259 |
request: SearchEmbeddingRequest,
|