nomadicsynth commited on
Commit
aa83efc
·
1 Parent(s): 261056f

Refactor find_synergistic_papers to use DatasetManager for accessing the training dataset

Browse files
Files changed (1) hide show
  1. app.py +4 -1
app.py CHANGED
@@ -368,8 +368,11 @@ def find_synergistic_papers(abstract: str, limit=25) -> list[dict]:
368
  # Generate embedding for the query abstract (normalized for cosine similarity)
369
  abstract_embedding = embed_text(abstract)
370
 
 
 
 
371
  # Search for similar papers using FAISS with inner product (cosine similarity for normalized vectors)
372
- scores, examples = dataset["train"].get_nearest_examples("embedding", abstract_embedding, k=limit)
373
 
374
  papers = []
375
  for i in range(len(scores)):
 
368
  # Generate embedding for the query abstract (normalized for cosine similarity)
369
  abstract_embedding = embed_text(abstract)
370
 
371
+ # Access the dataset's train split from the DatasetManager instance
372
+ train_dataset = dataset.dataset["train"]
373
+
374
  # Search for similar papers using FAISS with inner product (cosine similarity for normalized vectors)
375
+ scores, examples = train_dataset.get_nearest_examples("embedding", abstract_embedding, k=limit)
376
 
377
  papers = []
378
  for i in range(len(scores)):