Spaces:
Running
Running
Upload 2 files
Browse files- mtdna_classifier.py +7 -0
- requirements.txt +1 -0
mtdna_classifier.py
CHANGED
@@ -6,6 +6,7 @@ import re
|
|
6 |
from Bio import Entrez
|
7 |
import fitz
|
8 |
import spacy
|
|
|
9 |
from NER.PDF import pdf
|
10 |
from NER.WordDoc import wordDoc
|
11 |
from NER.html import extractHTML
|
@@ -110,6 +111,12 @@ def infer_location_fromQAModel(context, question="Where is the mtDNA sample from
|
|
110 |
return result["answer"]
|
111 |
# 4.2: Infer from haplogroup
|
112 |
# Load pre-trained spaCy model for NER
|
|
|
|
|
|
|
|
|
|
|
|
|
113 |
nlp = spacy.load("en_core_web_sm")
|
114 |
# Define the haplogroup-to-region mapping (simple rule-based)
|
115 |
import csv
|
|
|
6 |
from Bio import Entrez
|
7 |
import fitz
|
8 |
import spacy
|
9 |
+
from spacy.cli import download
|
10 |
from NER.PDF import pdf
|
11 |
from NER.WordDoc import wordDoc
|
12 |
from NER.html import extractHTML
|
|
|
111 |
return result["answer"]
|
112 |
# 4.2: Infer from haplogroup
|
113 |
# Load pre-trained spaCy model for NER
|
114 |
+
try:
|
115 |
+
nlp = spacy.load("en_core_web_sm")
|
116 |
+
except OSError:
|
117 |
+
download("en_core_web_sm")
|
118 |
+
nlp = spacy.load("en_core_web_sm")
|
119 |
+
|
120 |
nlp = spacy.load("en_core_web_sm")
|
121 |
# Define the haplogroup-to-region mapping (simple rule-based)
|
122 |
import csv
|
requirements.txt
CHANGED
@@ -14,6 +14,7 @@ spire.doc
|
|
14 |
Spire.XLS
|
15 |
thefuzz
|
16 |
wordsegment
|
|
|
17 |
spacy-lookups-data
|
18 |
gensim
|
19 |
nltk
|
|
|
14 |
Spire.XLS
|
15 |
thefuzz
|
16 |
wordsegment
|
17 |
+
spacy
|
18 |
spacy-lookups-data
|
19 |
gensim
|
20 |
nltk
|