amaye15 commited on
Commit
494872d
·
1 Parent(s): ff94fdb

Feat - Data Format

Browse files
Files changed (1) hide show
  1. src/api/services/embedding_service.py +12 -12
src/api/services/embedding_service.py CHANGED
@@ -126,7 +126,7 @@ class EmbeddingService:
126
  embedding_column: str,
127
  target_column: str,
128
  num_results: int,
129
- ) -> List[Dict]:
130
  """
131
  Perform a cosine similarity search between query embeddings and dataset embeddings.
132
 
@@ -138,7 +138,7 @@ class EmbeddingService:
138
  num_results: The number of results to return.
139
 
140
  Returns:
141
- A list of dictionaries containing the target column values and their similarity scores.
142
  """
143
  dataset_embeddings = np.array(dataset[embedding_column])
144
  query_embeddings = np.array(query_embeddings)
@@ -146,17 +146,17 @@ class EmbeddingService:
146
  # Compute cosine similarity
147
  similarities = cosine_similarity(query_embeddings, dataset_embeddings)
148
 
 
 
 
 
 
 
149
  # Get the top-k results for each query
150
- results = []
151
- for i, query_similarities in enumerate(similarities):
152
  top_k_indices = np.argsort(query_similarities)[-num_results:][::-1]
153
- top_k_results = [
154
- {
155
- target_column: dataset[target_column][idx],
156
- "similarity": float(query_similarities[idx]),
157
- }
158
- for idx in top_k_indices
159
- ]
160
- results.append(top_k_results)
161
 
162
  return results
 
126
  embedding_column: str,
127
  target_column: str,
128
  num_results: int,
129
+ ) -> Dict[str, List]:
130
  """
131
  Perform a cosine similarity search between query embeddings and dataset embeddings.
132
 
 
138
  num_results: The number of results to return.
139
 
140
  Returns:
141
+ A dictionary of lists containing the target column values and their similarity scores.
142
  """
143
  dataset_embeddings = np.array(dataset[embedding_column])
144
  query_embeddings = np.array(query_embeddings)
 
146
  # Compute cosine similarity
147
  similarities = cosine_similarity(query_embeddings, dataset_embeddings)
148
 
149
+ # Initialize the results dictionary
150
+ results = {
151
+ target_column: [],
152
+ "similarity": [],
153
+ }
154
+
155
  # Get the top-k results for each query
156
+ for query_similarities in similarities:
 
157
  top_k_indices = np.argsort(query_similarities)[-num_results:][::-1]
158
+ for idx in top_k_indices:
159
+ results[target_column].append(dataset[target_column][idx])
160
+ results["similarity"].append(float(query_similarities[idx]))
 
 
 
 
 
161
 
162
  return results