nabilcheikh1 commited on
Commit
054d2a0
·
1 Parent(s): b9122de

correction of columns of datasets

Browse files
src/api/models/embedding_models.py CHANGED
@@ -69,3 +69,4 @@ class SearchEmbeddingRequest(BaseModel):
69
 
70
  class ResetEmbeddingsRequest(BaseModel):
71
  dataset_name: str = "re-mind/product_type_embedding"
 
 
69
 
70
  class ResetEmbeddingsRequest(BaseModel):
71
  dataset_name: str = "re-mind/product_type_embedding"
72
+ target_column: str = "product_type"
src/api/services/postgresql_service.py CHANGED
@@ -13,26 +13,49 @@ class PostgresqlService:
13
  query_mapping = {
14
  "re-mind/product_type_embedding": {
15
  "column": "type",
 
16
  "table": "product_producttype",
 
 
 
 
 
 
17
  },
18
  "re-mind/marketplace_name_embedding": {
19
  "column": "name",
 
20
  "table": "invoice_marketplace",
21
  },
22
  "re-mind/manufacturer_name_embedding": {
23
  "column": "name",
 
24
  "table": "product_manufacturer",
25
  },
26
  "re-mind/seller_name_embedding": {
27
  "column": "name",
 
28
  "table": "product_seller",
29
  }
30
  }
31
 
32
  db_table = query_mapping[dataset]["table"]
33
  column = query_mapping[dataset]["column"]
34
- query = f"SELECT {column} FROM {db_table}"
 
 
 
 
 
 
 
 
35
 
 
 
 
 
 
36
  results = await self.db.fetch(query)
37
 
38
  return results
 
13
  query_mapping = {
14
  "re-mind/product_type_embedding": {
15
  "column": "type",
16
+ "target_column": "product_type",
17
  "table": "product_producttype",
18
+ "optional": {
19
+ "column": "name",
20
+ "target_column": "product_category",
21
+ "table": "product_category",
22
+ "foreign_key": "category_id",
23
+ }
24
  },
25
  "re-mind/marketplace_name_embedding": {
26
  "column": "name",
27
+ "target_column": "marketplace_name",
28
  "table": "invoice_marketplace",
29
  },
30
  "re-mind/manufacturer_name_embedding": {
31
  "column": "name",
32
+ "target_column": "manufacturer_name",
33
  "table": "product_manufacturer",
34
  },
35
  "re-mind/seller_name_embedding": {
36
  "column": "name",
37
+ "target_column": "seller_name",
38
  "table": "product_seller",
39
  }
40
  }
41
 
42
  db_table = query_mapping[dataset]["table"]
43
  column = query_mapping[dataset]["column"]
44
+ target_column = query_mapping[dataset]["target_column"]
45
+ optional = query_mapping[dataset].get("optional")
46
+ optional_query_1 = ""
47
+ optional_query_2 = ""
48
+ if optional:
49
+ optional_table = optional["table"]
50
+ optional_column = optional["column"]
51
+ optional_target_column = optional["target_column"]
52
+ optional_foreign_key = optional["foreign_key"]
53
 
54
+ optional_query_1 = f", {optional_table}.{optional_column} AS {optional_target_column}"
55
+ optional_query_2 = f"INNER JOIN {optional_table} ON {db_table}.{optional_foreign_key} = {optional_table}.id"
56
+ query = f"SELECT {db_table}.{column} AS {target_column} {optional_query_1} FROM {db_table} {optional_query_2}"
57
+
58
+ logger.info(query)
59
  results = await self.db.fetch(query)
60
 
61
  return results
src/main.py CHANGED
@@ -327,8 +327,7 @@ async def reset_embeddings(
327
 
328
  # Generation of embeddings for each row
329
  dataset = Dataset.from_dict(results)
330
- target_column = "type" if request.dataset_name == "re-mind/product_type_embedding" else "name"
331
- dataset_embedded = await embedding_service.create_embeddings(dataset, target_column, "embedding")
332
  # Embeddings up-to-date with database will overwrite old dataset
333
  await huggingface_service.push_to_hub(dataset_embedded, request.dataset_name)
334
 
 
327
 
328
  # Generation of embeddings for each row
329
  dataset = Dataset.from_dict(results)
330
+ dataset_embedded = await embedding_service.create_embeddings(dataset, request.target_column, "embedding")
 
331
  # Embeddings up-to-date with database will overwrite old dataset
332
  await huggingface_service.push_to_hub(dataset_embedded, request.dataset_name)
333