mgbam commited on
Commit
0375bcb
·
verified ·
1 Parent(s): 85862d0

Update mcp/nlp.py

Browse files
Files changed (1) hide show
  1. mcp/nlp.py +10 -6
mcp/nlp.py CHANGED
@@ -1,21 +1,25 @@
1
  # mcp/nlp.py
2
 
3
  import spacy
4
- import subprocess
5
 
6
  def load_model():
 
7
  try:
8
  return spacy.load("en_core_sci_sm")
9
  except OSError:
10
- try:
11
- subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"], check=True)
12
- return spacy.load("en_core_web_sm")
13
- except Exception as e:
14
- raise RuntimeError("Failed to download spaCy model. Details: " + str(e))
 
 
 
15
 
16
  nlp = load_model()
17
 
18
  def extract_keywords(text: str):
19
  """Extract biomedical or general entities from text."""
20
  doc = nlp(text)
 
21
  return list(set(ent.text for ent in doc.ents if len(ent.text.strip()) > 2))
 
1
  # mcp/nlp.py
2
 
3
  import spacy
 
4
 
5
  def load_model():
6
+ # Try scispacy first (if available)
7
  try:
8
  return spacy.load("en_core_sci_sm")
9
  except OSError:
10
+ pass # Not installed, try generic spaCy
11
+ try:
12
+ return spacy.load("en_core_web_sm")
13
+ except OSError:
14
+ raise RuntimeError(
15
+ "No spaCy model found! Please install 'en_core_sci_sm' (preferred) or 'en_core_web_sm' "
16
+ "in your Dockerfile or requirements.txt before running the app."
17
+ )
18
 
19
  nlp = load_model()
20
 
21
  def extract_keywords(text: str):
22
  """Extract biomedical or general entities from text."""
23
  doc = nlp(text)
24
+ # Only keep entities longer than 2 characters, no dups
25
  return list(set(ent.text for ent in doc.ents if len(ent.text.strip()) > 2))