mgbam commited on
Commit
bb95791
·
verified ·
1 Parent(s): 699c5d1

Update mcp/nlp.py

Browse files
Files changed (1) hide show
  1. mcp/nlp.py +14 -7
mcp/nlp.py CHANGED
@@ -1,14 +1,21 @@
1
  # mcp/nlp.py
2
 
3
  import spacy
 
4
 
5
- try:
6
- nlp = spacy.load("en_core_sci_sm")
7
- except Exception:
8
- nlp = spacy.load("en_core_web_sm") # Fallback to general English
 
 
 
 
 
 
 
9
 
10
  def extract_keywords(text: str):
11
- """Extract biomedical entities and drugs from text."""
12
  doc = nlp(text)
13
- keywords = list(set(ent.text for ent in doc.ents if len(ent.text) > 2))
14
- return keywords
 
1
  # mcp/nlp.py
2
 
3
  import spacy
4
+ import subprocess
5
 
6
+ def load_model():
7
+ try:
8
+ return spacy.load("en_core_sci_sm")
9
+ except OSError:
10
+ try:
11
+ subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"], check=True)
12
+ return spacy.load("en_core_web_sm")
13
+ except Exception as e:
14
+ raise RuntimeError("Failed to download spaCy model. Details: " + str(e))
15
+
16
+ nlp = load_model()
17
 
18
  def extract_keywords(text: str):
19
+ """Extract biomedical or general entities from text."""
20
  doc = nlp(text)
21
+ return list(set(ent.text for ent in doc.ents if len(ent.text.strip()) > 2))