|
|
|
|
|
import httpx |
|
import xmltodict |
|
import os |
|
|
|
PUBMED_ESEARCH = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi" |
|
PUBMED_EFETCH = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi" |
|
PUB_KEY = os.environ.get("PUB_KEY") |
|
|
|
async def fetch_pubmed(query: str, max_results: int = 5): |
|
"""Fetch latest PubMed articles for the query.""" |
|
async with httpx.AsyncClient() as client: |
|
params = { |
|
"db": "pubmed", |
|
"term": query, |
|
"retmax": max_results, |
|
"retmode": "json", |
|
"api_key": PUB_KEY |
|
} |
|
resp = await client.get(PUBMED_ESEARCH, params=params) |
|
ids = resp.json()["esearchresult"]["idlist"] |
|
if not ids: |
|
return [] |
|
efetch_params = { |
|
"db": "pubmed", |
|
"id": ",".join(ids), |
|
"retmode": "xml", |
|
"api_key": PUB_KEY |
|
} |
|
efetch_resp = await client.get(PUBMED_EFETCH, params=efetch_params) |
|
articles = xmltodict.parse(efetch_resp.text)["PubmedArticleSet"].get("PubmedArticle", []) |
|
if not isinstance(articles, list): |
|
articles = [articles] |
|
results = [] |
|
for a in articles: |
|
art = a["MedlineCitation"]["Article"] |
|
|
|
|
|
published = "" |
|
article_date = art.get("ArticleDate") |
|
if isinstance(article_date, list) and article_date: |
|
published = article_date[0].get("Year", "") |
|
elif isinstance(article_date, dict): |
|
published = article_date.get("Year", "") |
|
else: |
|
|
|
pubdate = art.get("Journal", {}).get("JournalIssue", {}).get("PubDate", {}) |
|
published = pubdate.get("Year", "") or pubdate.get("MedlineDate", "") |
|
|
|
|
|
authors_raw = art.get("AuthorList", {}).get("Author", []) |
|
if isinstance(authors_raw, dict): |
|
authors_raw = [authors_raw] |
|
authors = ", ".join([ |
|
f"{a.get('LastName', '')} {a.get('ForeName', '')}".strip() |
|
for a in authors_raw if a.get("LastName") and a.get("ForeName") |
|
]) if authors_raw else "Unknown" |
|
|
|
|
|
abstract = art.get("Abstract", {}).get("AbstractText", "") |
|
if isinstance(abstract, list): |
|
|
|
summary = " ".join( |
|
a.get("#text", str(a)) if isinstance(a, dict) else str(a) |
|
for a in abstract |
|
) |
|
elif isinstance(abstract, dict): |
|
summary = abstract.get("#text", "") |
|
else: |
|
summary = abstract or "" |
|
|
|
pmid = a["MedlineCitation"]["PMID"] |
|
if isinstance(pmid, dict): |
|
pmid = pmid.get("#text", "") |
|
|
|
results.append({ |
|
"title": art["ArticleTitle"], |
|
"authors": authors, |
|
"summary": summary, |
|
"link": f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/", |
|
"published": published, |
|
"source": "PubMed" |
|
}) |
|
return results |
|
|