# mcp/umls.py #!/usr/bin/env python3 """MedGenesis – lightweight async client for **UMLS REST services** Capabilities ~~~~~~~~~~~~ * Securely retrieves a Ticket‑Granting Ticket (TGT) with the API‑key stored in the environment variable `UMLS_KEY` (Hugging Face secret). * Uses the TGT to mint a short‑lived *Service Ticket* (ST) for each search call – as required by the UMLS CAS workflow. * `lookup_umls(term)` returns a dict with `{cui, name, rootSource}` for the best match (pageSize = 1). Falls back gracefully if nothing found. * Responses are cached for 4 h via `functools.lru_cache` to reduce quota usage (default: 1000 requests/day). Reference docs: • Authentication – https://documentation.uts.nlm.nih.gov/rest/authentication.html • Search endpoint – https://documentation.uts.nlm.nih.gov/rest/search.html """ from __future__ import annotations import os, httpx, asyncio, time from functools import lru_cache from typing import Dict, Optional # --------------------------------------------------------------------- # Constants & env # --------------------------------------------------------------------- _UMLS_API_KEY = os.getenv("UMLS_KEY") if not _UMLS_API_KEY: raise RuntimeError("Environment variable UMLS_KEY not set – cannot authenticate to UMLS API") _AUTH_URL = "https://utslogin.nlm.nih.gov/cas/v1/api-key" _SERVICE = "http://umlsks.nlm.nih.gov" # per UMLS docs _SEARCH_URL = "https://uts-ws.nlm.nih.gov/rest/search/current" _SESSION_TIMEOUT = 15 # seconds # --------------------------------------------------------------------- # Ticket helpers # --------------------------------------------------------------------- @lru_cache(maxsize=1) async def _get_tgt() -> str: """Get a Ticket‑Granting Ticket (TGT). Cached for its lifetime (~8 h).""" async with httpx.AsyncClient(timeout=_SESSION_TIMEOUT) as cli: resp = await cli.post(_AUTH_URL, data={"apikey": _UMLS_API_KEY}) if resp.status_code != 201: raise RuntimeError(f"UMLS auth failed: {resp.text[:200]}") tgt_url = resp.text.split('action="')[1].split('"')[0] return tgt_url # looks like: https://utslogin.nlm.nih.gov/cas/v1/tickets/TGT-… async def _get_service_ticket() -> str: tgt = await _get_tgt() async with httpx.AsyncClient(timeout=_SESSION_TIMEOUT) as cli: resp = await cli.post(tgt, data={"service": _SERVICE}) resp.raise_for_status() return resp.text # single‑use ST # --------------------------------------------------------------------- # Public search helper # --------------------------------------------------------------------- @lru_cache(maxsize=512) async def lookup_umls(term: str) -> Dict[str, Optional[str]]: """Return best‑match UMLS concept for *term* (or empty placeholders).""" st = await _get_service_ticket() params = { "string" : term, "ticket" : st, "pageSize" : 1, } async with httpx.AsyncClient(timeout=_SESSION_TIMEOUT) as cli: resp = await cli.get(_SEARCH_URL, params=params) resp.raise_for_status() results = resp.json().get("result", {}).get("results", []) if results: hit = results[0] return { "term" : term, "cui" : hit.get("ui"), "name" : hit.get("name"), "rootSource": hit.get("rootSource"), } return {"term": term, "cui": None, "name": None, "rootSource": None} # --------------------------------------------------------------------- # CLI demo # --------------------------------------------------------------------- if __name__ == "__main__": async def _demo(): print(await lookup_umls("glioblastoma")) asyncio.run(_demo())