mgbam commited on
Commit
872e232
·
verified ·
1 Parent(s): 864cb28

Update mcp/disgenet.py

Browse files
Files changed (1) hide show
  1. mcp/disgenet.py +62 -17
mcp/disgenet.py CHANGED
@@ -1,23 +1,68 @@
1
- # mcp/disgenet.py
2
- """
3
- DisGeNET disease–gene associations.
 
 
 
 
 
 
 
4
  """
 
5
 
6
- import os, httpx
7
- from typing import List, Dict
 
8
 
9
- DISGENET_KEY = os.getenv("DISGENET_KEY")
10
- HEADERS = {"Authorization": f"Bearer {DISGENET_KEY}"} if DISGENET_KEY else {}
 
 
11
 
12
- BASE = "https://www.disgenet.org/api"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
- async def disease_to_genes(disease_name: str, limit: int = 10) -> List[Dict]:
15
- """
16
- Return top gene associations for a disease.
 
 
 
 
 
 
 
 
 
 
 
17
  """
18
- url = f"{BASE}/gda/disease/{disease_name.lower()}"
19
- async with httpx.AsyncClient(timeout=20, headers=HEADERS) as client:
20
- r = await client.get(url, params={"source": "ALL", "format": "json"})
21
- r.raise_for_status()
22
- data = r.json()
23
- return data[:limit]
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """MedGenesis – DisGeNET async helper (disease → gene associations).
3
+
4
+ Features
5
+ ~~~~~~~~
6
+ * Accepts optional Bearer token via env **`DISGENET_KEY`** (rate‑limit free).
7
+ * Endpoint: `https://www.disgenet.org/api/gda/disease/<disease_name>`
8
+ * Back‑off retry (2×, 4×) for 429/5xx.
9
+ * LRU cache (24 h, 512 queries) to minimise API calls.
10
+ * Returns top *N* rows (default = 10) as `list[dict]`.
11
  """
12
+ from __future__ import annotations
13
 
14
+ import os, asyncio, httpx
15
+ from functools import lru_cache
16
+ from typing import List, Dict, Any
17
 
18
+ _BASE = "https://www.disgenet.org/api/gda/disease"
19
+ _TOKEN = os.getenv("DISGENET_KEY")
20
+ _HEADERS = {"Authorization": f"Bearer {_TOKEN}"} if (_TOKEN := os.getenv("DISGENET_KEY")) else {}
21
+ _TIMEOUT = 15
22
 
23
+ # ---------------------------------------------------------------------
24
+ # Internal fetch with retry
25
+ # ---------------------------------------------------------------------
26
+ async def _fetch(url: str, *, retries: int = 3) -> List[Dict[str, Any]]:
27
+ delay = 2
28
+ for _ in range(retries):
29
+ async with httpx.AsyncClient(timeout=_TIMEOUT, headers=_HEADERS) as cli:
30
+ resp = await cli.get(url, params={"source": "ALL", "format": "json"})
31
+ if resp.status_code == 200:
32
+ return resp.json()
33
+ if resp.status_code in {429, 500, 503}:
34
+ await asyncio.sleep(delay)
35
+ delay *= 2
36
+ continue
37
+ resp.raise_for_status()
38
+ return [] # final fallback
39
 
40
+ # ---------------------------------------------------------------------
41
+ # Public API – cached 24 h
42
+ # ---------------------------------------------------------------------
43
+ @lru_cache(maxsize=512)
44
+ async def disease_to_genes(disease_name: str, *, limit: int = 10) -> List[Dict]:
45
+ """Return up to *limit* gene‑association dicts for *disease_name*.
46
+
47
+ Parameters
48
+ ----------
49
+ disease_name : str
50
+ Free‑text disease label (e.g. "glioblastoma"). Internally converted
51
+ to lowercase and URL‑encoded.
52
+ limit : int, optional
53
+ Maximum number of rows to return (default = 10).
54
  """
55
+ url = f"{_BASE}/{disease_name.lower()}"
56
+ data = await _fetch(url)
57
+ return data[:limit]
58
+
59
+
60
+ # ---------------------------------------------------------------------
61
+ # CLI demo
62
+ # ---------------------------------------------------------------------
63
+ if __name__ == "__main__":
64
+ import json
65
+ async def _demo():
66
+ out = await disease_to_genes("glioblastoma", limit=5)
67
+ print(json.dumps(out[:2], indent=2))
68
+ asyncio.run(_demo())