Spaces:

mgbam
/

Medresearch

Sleeping

App Files Files Community

mgbam commited on Feb 7

Commit

5b8a3ff

verified ·

1 Parent(s): bb79d76

Update components/pubmed_search.py

Browse files

Files changed (1) hide show

components/pubmed_search.py +53 -25

components/pubmed_search.py CHANGED Viewed

@@ -1,7 +1,6 @@
 from Bio import Entrez
-import os  # For environment variables and file paths
-# ---------------------------- Configuration ----------------------------
 # ---------------------------- Helper Functions ----------------------------
@@ -14,50 +13,79 @@ def log_error(message: str):
     except:
         print("Couldn't write to error log file.")  #If logging fails, still print to console
-# ---------------------------- Tool Functions ----------------------------
 def search_pubmed(query: str) -> list:
     """Searches PubMed and returns a list of article IDs using semantic search."""
     try:
         Entrez.email = ENTREZ_EMAIL
         print(f"Entrez Email: {Entrez.email}")  # DEBUG: Check the email being used
         print(f"PubMed Query: {query}")  # DEBUG: Check the query being sent
-        # Semantic Search Using Sentence Transformers:
-        from sentence_transformers import SentenceTransformer, util
-        model = SentenceTransformer('all-mpnet-base-v2') #Model by all-mpnet-base-v2
         # Fetch PubMed IDs
-        handle = Entrez.esearch(db="pubmed", term=query, retmax="5")
-        record = Entrez.read(handle)
-        id_list = record["IdList"]
         # Fetch abstracts for all IDs:
         abstracts = []
         for article_id in id_list:
-            abstracts.append(fetch_abstract(article_id))
-        # Generate embeddings for abstracts and the query:
-        query_embedding = model.encode(query)
-        abstract_embeddings = model.encode(abstracts)
-        # Calculate cosine similarities
-        similarities = util.cos_sim(query_embedding, abstract_embeddings)[0]
-        # Sort by similarity (higher is better)
-        ranked_articles = sorted(zip(id_list, similarities), key=lambda x: x[1], reverse=True)
-        # Extract ranked IDs:
-        ranked_ids = [article_id for article_id, similarity in ranked_articles]
-        print(f"PubMed Results: {ranked_ids}")  # DEBUG: Check the results
-        return ranked_ids
     except Exception as e:
         log_error(f"PubMed search error: {e}")
         return [f"Error during PubMed search: {e}"]
-# ---------------------------- Agent Function ----------------------------
 def medai_agent(query: str) -> str:
     """Orchestrates the medical literature review and presents abstract."""

 from Bio import Entrez
+import os
+from sentence_transformers import SentenceTransformer, util
 # ---------------------------- Helper Functions ----------------------------
     except:
         print("Couldn't write to error log file.")  #If logging fails, still print to console
+# ---------------------------- Functions ----------------------------
 def search_pubmed(query: str) -> list:
     """Searches PubMed and returns a list of article IDs using semantic search."""
+    ENTREZ_EMAIL = os.environ.get("ENTREZ_EMAIL", "[email protected]")
     try:
         Entrez.email = ENTREZ_EMAIL
         print(f"Entrez Email: {Entrez.email}")  # DEBUG: Check the email being used
         print(f"PubMed Query: {query}")  # DEBUG: Check the query being sent
+        "" Added code ""
         # Fetch PubMed IDs
+        try:
+            handle = Entrez.esearch(db="pubmed", term=query, retmax="5")
+            record = Entrez.read(handle)
+            id_list = record["IdList"]
+            print(f"Original PubMed Results: {id_list}")
+        except Exception as e:
+            log_error(f"First step of PubMed search error: {e}")
+            return [f"First step of Error during PubMed search: {e}"]
+        "" Added code ""
         # Fetch abstracts for all IDs:
         abstracts = []
         for article_id in id_list:
+            abstract_text = fetch_abstract(article_id)
+            if "Error" in abstract_text:
+                print(f"Skipping article {article_id} due to abstract error: {abstract_text}")
+                continue
+            abstracts.append(abstract_text)
+        "" Added code ""
+        # Semantic Search Using Sentence Transformers:
+        try:
+            model = SentenceTransformer('all-mpnet-base-v2') #Model by all-mpnet-base-v2
+            # Generate embeddings for abstracts and the query:
+            query_embedding = model.encode(query)
+            abstract_embeddings = model.encode(abstracts)
+            # Calculate cosine similarities
+            similarities = util.cos_sim(query_embedding, abstract_embeddings)[0]
+            # Sort by similarity (higher is better)
+            ranked_articles = sorted(zip(id_list, similarities), key=lambda x: x[1], reverse=True)
+            # Extract ranked IDs:
+            ranked_ids = [article_id for article_id, similarity in ranked_articles]
+            print(f"Semantic search code: {ranked_ids}")  # DEBUG: Check the results
+            return ranked_ids
+        except Exception as e:
+            log_error(f"Semantic search code. : {e}")
+            return [f"Error during semantic search code: {e}"]
     except Exception as e:
         log_error(f"PubMed search error: {e}")
         return [f"Error during PubMed search: {e}"]
+def fetch_abstract(article_id: str) -> str:
+    """Fetches the abstract for a given PubMed article ID."""
+    ENTREZ_EMAIL = os.environ.get("ENTREZ_EMAIL", "[email protected]")
+    try:
+        Entrez.email = ENTREZ_EMAIL
+        handle = Entrez.efetch(db="pubmed", id=article_id, rettype="abstract", retmode="text")
+        abstract = handle.read()
+        handle.close()
+        return abstract
+    except Exception as e:
+        log_error(f"Error fetching abstract for {article_id}: {e}")
+        return f"Error fetching abstract for {article_id}: {e}"
 def medai_agent(query: str) -> str:
     """Orchestrates the medical literature review and presents abstract."""