Spaces:
Running
on
Zero
Running
on
Zero
Commit
·
aa83efc
1
Parent(s):
261056f
Refactor find_synergistic_papers to use DatasetManager for accessing the training dataset
Browse files
app.py
CHANGED
@@ -368,8 +368,11 @@ def find_synergistic_papers(abstract: str, limit=25) -> list[dict]:
|
|
368 |
# Generate embedding for the query abstract (normalized for cosine similarity)
|
369 |
abstract_embedding = embed_text(abstract)
|
370 |
|
|
|
|
|
|
|
371 |
# Search for similar papers using FAISS with inner product (cosine similarity for normalized vectors)
|
372 |
-
scores, examples =
|
373 |
|
374 |
papers = []
|
375 |
for i in range(len(scores)):
|
|
|
368 |
# Generate embedding for the query abstract (normalized for cosine similarity)
|
369 |
abstract_embedding = embed_text(abstract)
|
370 |
|
371 |
+
# Access the dataset's train split from the DatasetManager instance
|
372 |
+
train_dataset = dataset.dataset["train"]
|
373 |
+
|
374 |
# Search for similar papers using FAISS with inner product (cosine similarity for normalized vectors)
|
375 |
+
scores, examples = train_dataset.get_nearest_examples("embedding", abstract_embedding, k=limit)
|
376 |
|
377 |
papers = []
|
378 |
for i in range(len(scores)):
|