File size: 770 Bytes
0a3aede
 
 
 
bb95791
0375bcb
bb95791
 
 
0375bcb
 
 
 
 
 
 
 
bb95791
 
0a3aede
 
bb95791
0a3aede
0375bcb
bb95791
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
# mcp/nlp.py

import spacy

def load_model():
    # Try scispacy first (if available)
    try:
        return spacy.load("en_core_sci_sm")
    except OSError:
        pass  # Not installed, try generic spaCy
    try:
        return spacy.load("en_core_web_sm")
    except OSError:
        raise RuntimeError(
            "No spaCy model found! Please install 'en_core_sci_sm' (preferred) or 'en_core_web_sm' "
            "in your Dockerfile or requirements.txt before running the app."
        )

nlp = load_model()

def extract_keywords(text: str):
    """Extract biomedical or general entities from text."""
    doc = nlp(text)
    # Only keep entities longer than 2 characters, no dups
    return list(set(ent.text for ent in doc.ents if len(ent.text.strip()) > 2))