VyLala commited on
Commit
836521b
·
verified ·
1 Parent(s): 9cd8ed6

Upload 2 files

Browse files
Files changed (2) hide show
  1. mtdna_classifier.py +7 -0
  2. requirements.txt +1 -0
mtdna_classifier.py CHANGED
@@ -6,6 +6,7 @@ import re
6
  from Bio import Entrez
7
  import fitz
8
  import spacy
 
9
  from NER.PDF import pdf
10
  from NER.WordDoc import wordDoc
11
  from NER.html import extractHTML
@@ -110,6 +111,12 @@ def infer_location_fromQAModel(context, question="Where is the mtDNA sample from
110
  return result["answer"]
111
  # 4.2: Infer from haplogroup
112
  # Load pre-trained spaCy model for NER
 
 
 
 
 
 
113
  nlp = spacy.load("en_core_web_sm")
114
  # Define the haplogroup-to-region mapping (simple rule-based)
115
  import csv
 
6
  from Bio import Entrez
7
  import fitz
8
  import spacy
9
+ from spacy.cli import download
10
  from NER.PDF import pdf
11
  from NER.WordDoc import wordDoc
12
  from NER.html import extractHTML
 
111
  return result["answer"]
112
  # 4.2: Infer from haplogroup
113
  # Load pre-trained spaCy model for NER
114
+ try:
115
+ nlp = spacy.load("en_core_web_sm")
116
+ except OSError:
117
+ download("en_core_web_sm")
118
+ nlp = spacy.load("en_core_web_sm")
119
+
120
  nlp = spacy.load("en_core_web_sm")
121
  # Define the haplogroup-to-region mapping (simple rule-based)
122
  import csv
requirements.txt CHANGED
@@ -14,6 +14,7 @@ spire.doc
14
  Spire.XLS
15
  thefuzz
16
  wordsegment
 
17
  spacy-lookups-data
18
  gensim
19
  nltk
 
14
  Spire.XLS
15
  thefuzz
16
  wordsegment
17
+ spacy
18
  spacy-lookups-data
19
  gensim
20
  nltk