File size: 1,000 Bytes
0a3aede
c3f5ed6
0a3aede
c3f5ed6
 
2c1c247
c3f5ed6
 
 
 
 
 
 
 
2c1c247
c3f5ed6
2c1c247
c3f5ed6
 
 
2c1c247
0a3aede
c3f5ed6
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# mcp/nlp.py
import asyncio
import spacy
from typing import List, Dict
from mcp.umls import lookup_umls

# Load only the small English model
try:
    nlp = spacy.load("en_core_web_sm")
except OSError:
    # In case it wasn’t downloaded yet
    from spacy.cli import download
    download("en_core_web_sm")
    nlp = spacy.load("en_core_web_sm")

async def extract_umls_concepts(text: str) -> List[Dict]:
    """
    1) Run spaCy NER on the text
    2) For each unique entity, do an async UMLS lookup
    3) Return the list of successful concept dicts
    """
    doc = nlp(text)
    terms = {ent.text for ent in doc.ents if len(ent.text.strip()) > 2}

    # Kick off all lookups in parallel
    tasks = [lookup_umls(term) for term in terms]
    results = await asyncio.gather(*tasks, return_exceptions=True)

    # Filter out failures & concepts without CUI
    concepts = []
    for r in results:
        if isinstance(r, dict) and r.get("cui"):
            concepts.append(r)
    return concepts