mgbam commited on
Commit
ba02d68
·
verified ·
1 Parent(s): 707ae8b

Update mcp/nlp.py

Browse files
Files changed (1) hide show
  1. mcp/nlp.py +4 -10
mcp/nlp.py CHANGED
@@ -3,23 +3,17 @@
3
  import spacy
4
 
5
  def load_model():
6
- # Try scispacy first (if available)
7
- try:
8
- return spacy.load("en_core_sci_sm")
9
- except OSError:
10
- pass # Not installed, try generic spaCy
11
  try:
12
  return spacy.load("en_core_web_sm")
13
  except OSError:
14
  raise RuntimeError(
15
- "No spaCy model found! Please install 'en_core_sci_sm' (preferred) or 'en_core_web_sm' "
16
- "in your Dockerfile or requirements.txt before running the app."
17
  )
18
 
19
  nlp = load_model()
20
 
21
  def extract_keywords(text: str):
22
- """Extract biomedical or general entities from text."""
23
  doc = nlp(text)
24
- # Only keep entities longer than 2 characters, no dups
25
- return list(set(ent.text for ent in doc.ents if len(ent.text.strip()) > 2))
 
3
  import spacy
4
 
5
  def load_model():
 
 
 
 
 
6
  try:
7
  return spacy.load("en_core_web_sm")
8
  except OSError:
9
  raise RuntimeError(
10
+ "spaCy model 'en_core_web_sm' not found. "
11
+ "Install it in your Dockerfile or requirements.txt before building the app."
12
  )
13
 
14
  nlp = load_model()
15
 
16
  def extract_keywords(text: str):
17
+ """Extract entities (longer than 2 chars, no duplicates)."""
18
  doc = nlp(text)
19
+ return list({ent.text for ent in doc.ents if len(ent.text.strip()) > 2})