# mcp/nlp.py import asyncio import spacy from typing import List, Dict from mcp.umls import lookup_umls # Load only the small English model try: nlp = spacy.load("en_core_web_sm") except OSError: # In case it wasn’t downloaded yet from spacy.cli import download download("en_core_web_sm") nlp = spacy.load("en_core_web_sm") async def extract_umls_concepts(text: str) -> List[Dict]: """ 1) Run spaCy NER on the text 2) For each unique entity, do an async UMLS lookup 3) Return the list of successful concept dicts """ doc = nlp(text) terms = {ent.text for ent in doc.ents if len(ent.text.strip()) > 2} # Kick off all lookups in parallel tasks = [lookup_umls(term) for term in terms] results = await asyncio.gather(*tasks, return_exceptions=True) # Filter out failures & concepts without CUI concepts = [] for r in results: if isinstance(r, dict) and r.get("cui"): concepts.append(r) return concepts