mgbam commited on
Commit
bb79d76
·
verified ·
1 Parent(s): f7d2b7f

Update components/pubmed_search.py

Browse files
Files changed (1) hide show
  1. components/pubmed_search.py +33 -16
components/pubmed_search.py CHANGED
@@ -17,29 +17,46 @@ def log_error(message: str):
17
  # ---------------------------- Tool Functions ----------------------------
18
 
19
  def search_pubmed(query: str) -> list:
20
- """Searches PubMed and returns a list of article IDs."""
21
  try:
22
- Entrez.email = os.environ.get("ENTREZ_EMAIL", "[email protected]")
 
 
 
 
 
 
 
 
 
23
  handle = Entrez.esearch(db="pubmed", term=query, retmax="5")
24
  record = Entrez.read(handle)
25
- handle.close()
26
- return record["IdList"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  except Exception as e:
28
  log_error(f"PubMed search error: {e}")
29
  return [f"Error during PubMed search: {e}"]
30
 
31
- def fetch_abstract(article_id: str) -> str:
32
- """Fetches the abstract for a given PubMed article ID."""
33
- try:
34
- Entrez.email = os.environ.get("ENTREZ_EMAIL", "[email protected]")
35
- handle = Entrez.efetch(db="pubmed", id=article_id, rettype="abstract", retmode="text")
36
- abstract = handle.read()
37
- handle.close()
38
- return abstract
39
- except Exception as e:
40
- log_error(f"Error fetching abstract for {article_id}: {e}")
41
- return f"Error fetching abstract for {article_id}: {e}"
42
-
43
  # ---------------------------- Agent Function ----------------------------
44
 
45
  def medai_agent(query: str) -> str:
 
17
  # ---------------------------- Tool Functions ----------------------------
18
 
19
  def search_pubmed(query: str) -> list:
20
+ """Searches PubMed and returns a list of article IDs using semantic search."""
21
  try:
22
+ Entrez.email = ENTREZ_EMAIL
23
+ print(f"Entrez Email: {Entrez.email}") # DEBUG: Check the email being used
24
+ print(f"PubMed Query: {query}") # DEBUG: Check the query being sent
25
+
26
+ # Semantic Search Using Sentence Transformers:
27
+ from sentence_transformers import SentenceTransformer, util
28
+
29
+ model = SentenceTransformer('all-mpnet-base-v2') #Model by all-mpnet-base-v2
30
+
31
+ # Fetch PubMed IDs
32
  handle = Entrez.esearch(db="pubmed", term=query, retmax="5")
33
  record = Entrez.read(handle)
34
+ id_list = record["IdList"]
35
+
36
+ # Fetch abstracts for all IDs:
37
+ abstracts = []
38
+ for article_id in id_list:
39
+ abstracts.append(fetch_abstract(article_id))
40
+
41
+ # Generate embeddings for abstracts and the query:
42
+ query_embedding = model.encode(query)
43
+ abstract_embeddings = model.encode(abstracts)
44
+
45
+ # Calculate cosine similarities
46
+ similarities = util.cos_sim(query_embedding, abstract_embeddings)[0]
47
+
48
+ # Sort by similarity (higher is better)
49
+ ranked_articles = sorted(zip(id_list, similarities), key=lambda x: x[1], reverse=True)
50
+
51
+ # Extract ranked IDs:
52
+ ranked_ids = [article_id for article_id, similarity in ranked_articles]
53
+
54
+ print(f"PubMed Results: {ranked_ids}") # DEBUG: Check the results
55
+ return ranked_ids
56
  except Exception as e:
57
  log_error(f"PubMed search error: {e}")
58
  return [f"Error during PubMed search: {e}"]
59
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  # ---------------------------- Agent Function ----------------------------
61
 
62
  def medai_agent(query: str) -> str: