|
|
|
|
|
import httpx |
|
import xmltodict |
|
import os |
|
|
|
PUBMED_ESEARCH = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi" |
|
PUBMED_EFETCH = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi" |
|
PUB_KEY = os.environ.get("PUB_KEY") |
|
|
|
async def fetch_pubmed(query: str, max_results: int = 5): |
|
"""Fetch latest PubMed articles for the query.""" |
|
async with httpx.AsyncClient() as client: |
|
params = { |
|
"db": "pubmed", |
|
"term": query, |
|
"retmax": max_results, |
|
"retmode": "json", |
|
"api_key": PUB_KEY |
|
} |
|
resp = await client.get(PUBMED_ESEARCH, params=params) |
|
ids = resp.json()["esearchresult"]["idlist"] |
|
if not ids: |
|
return [] |
|
efetch_params = { |
|
"db": "pubmed", |
|
"id": ",".join(ids), |
|
"retmode": "xml", |
|
"api_key": PUB_KEY |
|
} |
|
efetch_resp = await client.get(PUBMED_EFETCH, params=efetch_params) |
|
articles = xmltodict.parse(efetch_resp.text)["PubmedArticleSet"].get("PubmedArticle", []) |
|
if not isinstance(articles, list): |
|
articles = [articles] |
|
results = [] |
|
for a in articles: |
|
art = a["MedlineCitation"]["Article"] |
|
results.append({ |
|
"title": art["ArticleTitle"], |
|
"authors": ", ".join([a["LastName"] + " " + a["ForeName"] for a in art.get("AuthorList", {}).get("Author", []) if "LastName" in a and "ForeName" in a]), |
|
"summary": art.get("Abstract", {}).get("AbstractText", [""])[0], |
|
"link": f"https://pubmed.ncbi.nlm.nih.gov/{a['MedlineCitation']['PMID']['#text']}/", |
|
"published": art.get("ArticleDate", [{}])[0].get("Year", ""), |
|
"source": "PubMed" |
|
}) |
|
return results |
|
|