MCP_Res / mcp /nlp.py
mgbam's picture
Update mcp/nlp.py
c3f5ed6 verified
raw
history blame
1 kB
# mcp/nlp.py
import asyncio
import spacy
from typing import List, Dict
from mcp.umls import lookup_umls
# Load only the small English model
try:
nlp = spacy.load("en_core_web_sm")
except OSError:
# In case it wasn’t downloaded yet
from spacy.cli import download
download("en_core_web_sm")
nlp = spacy.load("en_core_web_sm")
async def extract_umls_concepts(text: str) -> List[Dict]:
"""
1) Run spaCy NER on the text
2) For each unique entity, do an async UMLS lookup
3) Return the list of successful concept dicts
"""
doc = nlp(text)
terms = {ent.text for ent in doc.ents if len(ent.text.strip()) > 2}
# Kick off all lookups in parallel
tasks = [lookup_umls(term) for term in terms]
results = await asyncio.gather(*tasks, return_exceptions=True)
# Filter out failures & concepts without CUI
concepts = []
for r in results:
if isinstance(r, dict) and r.get("cui"):
concepts.append(r)
return concepts