Spaces:
Running
Running
Update mtdna_classifier.py
Browse files- mtdna_classifier.py +13 -11
mtdna_classifier.py
CHANGED
@@ -236,17 +236,19 @@ def classify_mtDNA_sample_from_haplo(text):
|
|
236 |
}
|
237 |
# 4.3 Get from available NCBI
|
238 |
def infer_location_fromNCBI(accession):
|
239 |
-
|
240 |
-
|
241 |
-
|
242 |
-
|
243 |
-
|
244 |
-
|
245 |
-
|
246 |
-
|
247 |
-
|
248 |
-
|
249 |
-
|
|
|
|
|
250 |
|
251 |
# STEP 5: Main pipeline: accession -> 1. get pubmed id and isolate -> 2. get doi -> 3. get text -> 4. prediction -> 5. output: inferred location + explanation + confidence score
|
252 |
def classify_sample_location(accession):
|
|
|
236 |
}
|
237 |
# 4.3 Get from available NCBI
|
238 |
def infer_location_fromNCBI(accession):
|
239 |
+
try:
|
240 |
+
handle = Entrez.efetch(db="nuccore", id=accession, rettype="medline", retmode="text")
|
241 |
+
text = handle.read()
|
242 |
+
handle.close()
|
243 |
+
match = re.search(r'/(geo_loc_name|country|location)\s*=\s*"([^"]+)"', text)
|
244 |
+
if match:
|
245 |
+
return match.group(2) # This is the value like "Brunei"
|
246 |
+
return None
|
247 |
+
|
248 |
+
except Exception as e:
|
249 |
+
print("❌ Entrez error:", e)
|
250 |
+
return ""
|
251 |
+
|
252 |
|
253 |
# STEP 5: Main pipeline: accession -> 1. get pubmed id and isolate -> 2. get doi -> 3. get text -> 4. prediction -> 5. output: inferred location + explanation + confidence score
|
254 |
def classify_sample_location(accession):
|