|
|
|
import asyncio |
|
import spacy |
|
from typing import List, Dict |
|
from mcp.umls import lookup_umls |
|
|
|
|
|
try: |
|
nlp = spacy.load("en_core_web_sm") |
|
except OSError: |
|
|
|
from spacy.cli import download |
|
download("en_core_web_sm") |
|
nlp = spacy.load("en_core_web_sm") |
|
|
|
async def extract_umls_concepts(text: str) -> List[Dict]: |
|
""" |
|
1) Run spaCy NER on the text |
|
2) For each unique entity, do an async UMLS lookup |
|
3) Return the list of successful concept dicts |
|
""" |
|
doc = nlp(text) |
|
terms = {ent.text for ent in doc.ents if len(ent.text.strip()) > 2} |
|
|
|
|
|
tasks = [lookup_umls(term) for term in terms] |
|
results = await asyncio.gather(*tasks, return_exceptions=True) |
|
|
|
|
|
concepts = [] |
|
for r in results: |
|
if isinstance(r, dict) and r.get("cui"): |
|
concepts.append(r) |
|
return concepts |
|
|