Spaces:
Sleeping
Sleeping
from Bio import Entrez | |
import os | |
from sentence_transformers import SentenceTransformer, util | |
# ---------------------------- Helper Functions ---------------------------- | |
def log_error(message: str): | |
"""Logs an error message to the console and a file (if possible).""" | |
print(f"ERROR: {message}") | |
try: | |
with open("error_log.txt", "a") as f: | |
f.write(f"{message}\n") | |
except: | |
print("Couldn't write to error log file.") #If logging fails, still print to console | |
# ---------------------------- Functions ---------------------------- | |
def search_pubmed(query: str) -> list: | |
"""Searches PubMed and returns a list of article IDs using semantic search.""" | |
ENTREZ_EMAIL = os.environ.get("ENTREZ_EMAIL", "[email protected]") | |
try: | |
Entrez.email = ENTREZ_EMAIL | |
print(f"Entrez Email: {Entrez.email}") # DEBUG: Check the email being used | |
print(f"PubMed Query: {query}") # DEBUG: Check the query being sent | |
"" Added code "" | |
# Fetch PubMed IDs | |
try: | |
handle = Entrez.esearch(db="pubmed", term=query, retmax="5") | |
record = Entrez.read(handle) | |
id_list = record["IdList"] | |
print(f"Original PubMed Results: {id_list}") | |
except Exception as e: | |
log_error(f"First step of PubMed search error: {e}") | |
return [f"First step of Error during PubMed search: {e}"] | |
"" Added code "" | |
# Fetch abstracts for all IDs: | |
abstracts = [] | |
for article_id in id_list: | |
abstract_text = fetch_abstract(article_id) | |
if "Error" in abstract_text: | |
print(f"Skipping article {article_id} due to abstract error: {abstract_text}") | |
continue | |
abstracts.append(abstract_text) | |
"" Added code "" | |
# Semantic Search Using Sentence Transformers: | |
try: | |
model = SentenceTransformer('all-mpnet-base-v2') #Model by all-mpnet-base-v2 | |
# Generate embeddings for abstracts and the query: | |
query_embedding = model.encode(query) | |
abstract_embeddings = model.encode(abstracts) | |
# Calculate cosine similarities | |
similarities = util.cos_sim(query_embedding, abstract_embeddings)[0] | |
# Sort by similarity (higher is better) | |
ranked_articles = sorted(zip(id_list, similarities), key=lambda x: x[1], reverse=True) | |
# Extract ranked IDs: | |
ranked_ids = [article_id for article_id, similarity in ranked_articles] | |
print(f"Semantic search code: {ranked_ids}") # DEBUG: Check the results | |
return ranked_ids | |
except Exception as e: | |
log_error(f"Semantic search code. : {e}") | |
return [f"Error during semantic search code: {e}"] | |
except Exception as e: | |
log_error(f"PubMed search error: {e}") | |
return [f"Error during PubMed search: {e}"] | |
def fetch_abstract(article_id: str) -> str: | |
"""Fetches the abstract for a given PubMed article ID.""" | |
ENTREZ_EMAIL = os.environ.get("ENTREZ_EMAIL", "[email protected]") | |
try: | |
Entrez.email = ENTREZ_EMAIL | |
handle = Entrez.efetch(db="pubmed", id=article_id, rettype="abstract", retmode="text") | |
abstract = handle.read() | |
handle.close() | |
return abstract | |
except Exception as e: | |
log_error(f"Error fetching abstract for {article_id}: {e}") | |
return f"Error fetching abstract for {article_id}: {e}" | |
def medai_agent(query: str) -> str: | |
"""Orchestrates the medical literature review and presents abstract.""" | |
article_ids = search_pubmed(query) | |
if isinstance(article_ids, list) and article_ids: | |
results = [] | |
for article_id in article_ids: | |
abstract = fetch_abstract(article_id) | |
if "Error" not in abstract: | |
results.append(f"<div class='article'>\n" | |
f" <h3 class='article-id'>Article ID: {article_id}</h3>\n" | |
f" <p class='abstract'><strong>Abstract:</strong> {abstract}</p>\n" | |
f"</div>\n") | |
else: | |
results.append(f"<div class='article error'>\n" | |
f" <h3 class='article-id'>Article ID: {article_id}</h3>\n" | |
f" <p class='error-message'>Error processing article: {abstract}</p>\n" | |
f"</div>\n") | |
return "\n".join(results) | |
else: | |
return f"No articles found or error occurred: {article_ids}" |