mgbam commited on
Commit
5b8a3ff
·
verified ·
1 Parent(s): bb79d76

Update components/pubmed_search.py

Browse files
Files changed (1) hide show
  1. components/pubmed_search.py +53 -25
components/pubmed_search.py CHANGED
@@ -1,7 +1,6 @@
1
  from Bio import Entrez
2
- import os # For environment variables and file paths
3
-
4
- # ---------------------------- Configuration ----------------------------
5
 
6
  # ---------------------------- Helper Functions ----------------------------
7
 
@@ -14,50 +13,79 @@ def log_error(message: str):
14
  except:
15
  print("Couldn't write to error log file.") #If logging fails, still print to console
16
 
17
- # ---------------------------- Tool Functions ----------------------------
18
 
19
  def search_pubmed(query: str) -> list:
20
  """Searches PubMed and returns a list of article IDs using semantic search."""
 
21
  try:
22
  Entrez.email = ENTREZ_EMAIL
23
  print(f"Entrez Email: {Entrez.email}") # DEBUG: Check the email being used
24
  print(f"PubMed Query: {query}") # DEBUG: Check the query being sent
25
 
26
- # Semantic Search Using Sentence Transformers:
27
- from sentence_transformers import SentenceTransformer, util
28
-
29
- model = SentenceTransformer('all-mpnet-base-v2') #Model by all-mpnet-base-v2
30
-
31
  # Fetch PubMed IDs
32
- handle = Entrez.esearch(db="pubmed", term=query, retmax="5")
33
- record = Entrez.read(handle)
34
- id_list = record["IdList"]
 
 
 
 
 
 
35
 
 
36
  # Fetch abstracts for all IDs:
37
  abstracts = []
38
  for article_id in id_list:
39
- abstracts.append(fetch_abstract(article_id))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
40
 
41
- # Generate embeddings for abstracts and the query:
42
- query_embedding = model.encode(query)
43
- abstract_embeddings = model.encode(abstracts)
44
 
45
- # Calculate cosine similarities
46
- similarities = util.cos_sim(query_embedding, abstract_embeddings)[0]
47
 
48
- # Sort by similarity (higher is better)
49
- ranked_articles = sorted(zip(id_list, similarities), key=lambda x: x[1], reverse=True)
50
 
51
- # Extract ranked IDs:
52
- ranked_ids = [article_id for article_id, similarity in ranked_articles]
 
53
 
54
- print(f"PubMed Results: {ranked_ids}") # DEBUG: Check the results
55
- return ranked_ids
56
  except Exception as e:
57
  log_error(f"PubMed search error: {e}")
58
  return [f"Error during PubMed search: {e}"]
59
 
60
- # ---------------------------- Agent Function ----------------------------
 
 
 
 
 
 
 
 
 
 
 
61
 
62
  def medai_agent(query: str) -> str:
63
  """Orchestrates the medical literature review and presents abstract."""
 
1
  from Bio import Entrez
2
+ import os
3
+ from sentence_transformers import SentenceTransformer, util
 
4
 
5
  # ---------------------------- Helper Functions ----------------------------
6
 
 
13
  except:
14
  print("Couldn't write to error log file.") #If logging fails, still print to console
15
 
16
+ # ---------------------------- Functions ----------------------------
17
 
18
  def search_pubmed(query: str) -> list:
19
  """Searches PubMed and returns a list of article IDs using semantic search."""
20
+ ENTREZ_EMAIL = os.environ.get("ENTREZ_EMAIL", "[email protected]")
21
  try:
22
  Entrez.email = ENTREZ_EMAIL
23
  print(f"Entrez Email: {Entrez.email}") # DEBUG: Check the email being used
24
  print(f"PubMed Query: {query}") # DEBUG: Check the query being sent
25
 
26
+ "" Added code ""
 
 
 
 
27
  # Fetch PubMed IDs
28
+ try:
29
+ handle = Entrez.esearch(db="pubmed", term=query, retmax="5")
30
+ record = Entrez.read(handle)
31
+ id_list = record["IdList"]
32
+ print(f"Original PubMed Results: {id_list}")
33
+
34
+ except Exception as e:
35
+ log_error(f"First step of PubMed search error: {e}")
36
+ return [f"First step of Error during PubMed search: {e}"]
37
 
38
+ "" Added code ""
39
  # Fetch abstracts for all IDs:
40
  abstracts = []
41
  for article_id in id_list:
42
+ abstract_text = fetch_abstract(article_id)
43
+ if "Error" in abstract_text:
44
+ print(f"Skipping article {article_id} due to abstract error: {abstract_text}")
45
+ continue
46
+ abstracts.append(abstract_text)
47
+
48
+ "" Added code ""
49
+ # Semantic Search Using Sentence Transformers:
50
+ try:
51
+ model = SentenceTransformer('all-mpnet-base-v2') #Model by all-mpnet-base-v2
52
+ # Generate embeddings for abstracts and the query:
53
+ query_embedding = model.encode(query)
54
+ abstract_embeddings = model.encode(abstracts)
55
+
56
+ # Calculate cosine similarities
57
+ similarities = util.cos_sim(query_embedding, abstract_embeddings)[0]
58
+
59
+ # Sort by similarity (higher is better)
60
+ ranked_articles = sorted(zip(id_list, similarities), key=lambda x: x[1], reverse=True)
61
 
62
+ # Extract ranked IDs:
63
+ ranked_ids = [article_id for article_id, similarity in ranked_articles]
 
64
 
65
+ print(f"Semantic search code: {ranked_ids}") # DEBUG: Check the results
 
66
 
67
+ return ranked_ids
 
68
 
69
+ except Exception as e:
70
+ log_error(f"Semantic search code. : {e}")
71
+ return [f"Error during semantic search code: {e}"]
72
 
 
 
73
  except Exception as e:
74
  log_error(f"PubMed search error: {e}")
75
  return [f"Error during PubMed search: {e}"]
76
 
77
+ def fetch_abstract(article_id: str) -> str:
78
+ """Fetches the abstract for a given PubMed article ID."""
79
+ ENTREZ_EMAIL = os.environ.get("ENTREZ_EMAIL", "[email protected]")
80
+ try:
81
+ Entrez.email = ENTREZ_EMAIL
82
+ handle = Entrez.efetch(db="pubmed", id=article_id, rettype="abstract", retmode="text")
83
+ abstract = handle.read()
84
+ handle.close()
85
+ return abstract
86
+ except Exception as e:
87
+ log_error(f"Error fetching abstract for {article_id}: {e}")
88
+ return f"Error fetching abstract for {article_id}: {e}"
89
 
90
  def medai_agent(query: str) -> str:
91
  """Orchestrates the medical literature review and presents abstract."""