Spaces:

mgbam
/

MCP_Res

Running

File size: 3,343 Bytes

# mcp/pubmed.py

import httpx
import xmltodict
import os

PUBMED_ESEARCH = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi"
PUBMED_EFETCH = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi"
PUB_KEY = os.environ.get("PUB_KEY")

async def fetch_pubmed(query: str, max_results: int = 5):
    """Fetch latest PubMed articles for the query."""
    async with httpx.AsyncClient() as client:
        params = {
            "db": "pubmed",
            "term": query,
            "retmax": max_results,
            "retmode": "json",
            "api_key": PUB_KEY
        }
        resp = await client.get(PUBMED_ESEARCH, params=params)
        ids = resp.json()["esearchresult"]["idlist"]
        if not ids:
            return []
        efetch_params = {
            "db": "pubmed",
            "id": ",".join(ids),
            "retmode": "xml",
            "api_key": PUB_KEY
        }
        efetch_resp = await client.get(PUBMED_EFETCH, params=efetch_params)
        articles = xmltodict.parse(efetch_resp.text)["PubmedArticleSet"].get("PubmedArticle", [])
        if not isinstance(articles, list):  # Single article edge case
            articles = [articles]
        results = []
        for a in articles:
            art = a["MedlineCitation"]["Article"]

            # Robustly extract publication year or date
            published = ""
            article_date = art.get("ArticleDate")
            if isinstance(article_date, list) and article_date:
                published = article_date[0].get("Year", "")
            elif isinstance(article_date, dict):
                published = article_date.get("Year", "")
            else:
                # Fallback to PubDate in Journal > JournalIssue > PubDate
                pubdate = art.get("Journal", {}).get("JournalIssue", {}).get("PubDate", {})
                published = pubdate.get("Year", "") or pubdate.get("MedlineDate", "")

            # Robustly extract authors
            authors_raw = art.get("AuthorList", {}).get("Author", [])
            if isinstance(authors_raw, dict):
                authors_raw = [authors_raw]
            authors = ", ".join([
                f"{a.get('LastName', '')} {a.get('ForeName', '')}".strip()
                for a in authors_raw if a.get("LastName") and a.get("ForeName")
            ]) if authors_raw else "Unknown"

            # Robustly extract summary/abstract
            abstract = art.get("Abstract", {}).get("AbstractText", "")
            if isinstance(abstract, list):
                # List of dicts or strings
                summary = " ".join(
                    a.get("#text", str(a)) if isinstance(a, dict) else str(a)
                    for a in abstract
                )
            elif isinstance(abstract, dict):
                summary = abstract.get("#text", "")
            else:
                summary = abstract or ""

            pmid = a["MedlineCitation"]["PMID"]
            if isinstance(pmid, dict):
                pmid = pmid.get("#text", "")

            results.append({
                "title": art["ArticleTitle"],
                "authors": authors,
                "summary": summary,
                "link": f"https://pubmed.ncbi.nlm.nih.gov/{pmid}/",
                "published": published,
                "source": "PubMed"
            })
        return results