MCP_Res / mcp /nlp.py
mgbam's picture
Update mcp/nlp.py
ba02d68 verified
raw
history blame
517 Bytes
# mcp/nlp.py
import spacy
def load_model():
try:
return spacy.load("en_core_web_sm")
except OSError:
raise RuntimeError(
"spaCy model 'en_core_web_sm' not found. "
"Install it in your Dockerfile or requirements.txt before building the app."
)
nlp = load_model()
def extract_keywords(text: str):
"""Extract entities (longer than 2 chars, no duplicates)."""
doc = nlp(text)
return list({ent.text for ent in doc.ents if len(ent.text.strip()) > 2})