#!/usr/bin/env python3 """MedGenesis – DisGeNET async helper (disease → gene associations). Features ~~~~~~~~ * Accepts optional Bearer token via env **`DISGENET_KEY`** (rate‑limit free). * Endpoint: `https://www.disgenet.org/api/gda/disease/` * Back‑off retry (2×, 4×) for 429/5xx. * LRU cache (24 h, 512 queries) to minimise API calls. * Returns top *N* rows (default = 10) as `list[dict]`. """ from __future__ import annotations import os, asyncio, httpx from functools import lru_cache from typing import List, Dict, Any _BASE = "https://www.disgenet.org/api/gda/disease" _TOKEN = os.getenv("DISGENET_KEY") _HEADERS = {"Authorization": f"Bearer {_TOKEN}"} if (_TOKEN := os.getenv("DISGENET_KEY")) else {} _TIMEOUT = 15 # --------------------------------------------------------------------- # Internal fetch with retry # --------------------------------------------------------------------- async def _fetch(url: str, *, retries: int = 3) -> List[Dict[str, Any]]: delay = 2 for _ in range(retries): async with httpx.AsyncClient(timeout=_TIMEOUT, headers=_HEADERS) as cli: resp = await cli.get(url, params={"source": "ALL", "format": "json"}) if resp.status_code == 200: return resp.json() if resp.status_code in {429, 500, 503}: await asyncio.sleep(delay) delay *= 2 continue resp.raise_for_status() return [] # final fallback # --------------------------------------------------------------------- # Public API – cached 24 h # --------------------------------------------------------------------- @lru_cache(maxsize=512) async def disease_to_genes(disease_name: str, *, limit: int = 10) -> List[Dict]: """Return up to *limit* gene‑association dicts for *disease_name*. Parameters ---------- disease_name : str Free‑text disease label (e.g. "glioblastoma"). Internally converted to lowercase and URL‑encoded. limit : int, optional Maximum number of rows to return (default = 10). """ url = f"{_BASE}/{disease_name.lower()}" data = await _fetch(url) return data[:limit] # --------------------------------------------------------------------- # CLI demo # --------------------------------------------------------------------- if __name__ == "__main__": import json async def _demo(): out = await disease_to_genes("glioblastoma", limit=5) print(json.dumps(out[:2], indent=2)) asyncio.run(_demo())