Spaces:

mgbam
/

Medresearch

Sleeping

App Files Files Community

Medresearch / components /pubmed_search.py

mgbam

Update components/pubmed_search.py

5b8a3ff verified 5 months ago

raw

history blame contribute delete

4.59 kB

	from Bio import Entrez
	import os
	from sentence_transformers import SentenceTransformer, util

	# ---------------------------- Helper Functions ----------------------------

	def log_error(message: str):
	"""Logs an error message to the console and a file (if possible)."""
	print(f"ERROR: {message}")
	try:
	with open("error_log.txt", "a") as f:
	f.write(f"{message}\n")
	except:
	print("Couldn't write to error log file.") #If logging fails, still print to console

	# ---------------------------- Functions ----------------------------

	def search_pubmed(query: str) -> list:
	"""Searches PubMed and returns a list of article IDs using semantic search."""
	ENTREZ_EMAIL = os.environ.get("ENTREZ_EMAIL", "[email protected]")
	try:
	Entrez.email = ENTREZ_EMAIL
	print(f"Entrez Email: {Entrez.email}") # DEBUG: Check the email being used
	print(f"PubMed Query: {query}") # DEBUG: Check the query being sent

	"" Added code ""
	# Fetch PubMed IDs
	try:
	handle = Entrez.esearch(db="pubmed", term=query, retmax="5")
	record = Entrez.read(handle)
	id_list = record["IdList"]
	print(f"Original PubMed Results: {id_list}")

	except Exception as e:
	log_error(f"First step of PubMed search error: {e}")
	return [f"First step of Error during PubMed search: {e}"]

	"" Added code ""
	# Fetch abstracts for all IDs:
	abstracts = []
	for article_id in id_list:
	abstract_text = fetch_abstract(article_id)
	if "Error" in abstract_text:
	print(f"Skipping article {article_id} due to abstract error: {abstract_text}")
	continue
	abstracts.append(abstract_text)

	"" Added code ""
	# Semantic Search Using Sentence Transformers:
	try:
	model = SentenceTransformer('all-mpnet-base-v2') #Model by all-mpnet-base-v2
	# Generate embeddings for abstracts and the query:
	query_embedding = model.encode(query)
	abstract_embeddings = model.encode(abstracts)

	# Calculate cosine similarities
	similarities = util.cos_sim(query_embedding, abstract_embeddings)[0]

	# Sort by similarity (higher is better)
	ranked_articles = sorted(zip(id_list, similarities), key=lambda x: x[1], reverse=True)

	# Extract ranked IDs:
	ranked_ids = [article_id for article_id, similarity in ranked_articles]

	print(f"Semantic search code: {ranked_ids}") # DEBUG: Check the results

	return ranked_ids

	except Exception as e:
	log_error(f"Semantic search code. : {e}")
	return [f"Error during semantic search code: {e}"]

	except Exception as e:
	log_error(f"PubMed search error: {e}")
	return [f"Error during PubMed search: {e}"]

	def fetch_abstract(article_id: str) -> str:
	"""Fetches the abstract for a given PubMed article ID."""
	ENTREZ_EMAIL = os.environ.get("ENTREZ_EMAIL", "[email protected]")
	try:
	Entrez.email = ENTREZ_EMAIL
	handle = Entrez.efetch(db="pubmed", id=article_id, rettype="abstract", retmode="text")
	abstract = handle.read()
	handle.close()
	return abstract
	except Exception as e:
	log_error(f"Error fetching abstract for {article_id}: {e}")
	return f"Error fetching abstract for {article_id}: {e}"

	def medai_agent(query: str) -> str:
	"""Orchestrates the medical literature review and presents abstract."""
	article_ids = search_pubmed(query)

	if isinstance(article_ids, list) and article_ids:
	results = []
	for article_id in article_ids:
	abstract = fetch_abstract(article_id)
	if "Error" not in abstract:
	results.append(f"<div class='article'>\n"
	f" <h3 class='article-id'>Article ID: {article_id}</h3>\n"
	f" <p class='abstract'><strong>Abstract:</strong> {abstract}</p>\n"
	f"</div>\n")
	else:
	results.append(f"<div class='article error'>\n"
	f" <h3 class='article-id'>Article ID: {article_id}</h3>\n"
	f" <p class='error-message'>Error processing article: {abstract}</p>\n"
	f"</div>\n")
	return "\n".join(results)
	else:
	return f"No articles found or error occurred: {article_ids}"