Spaces:

VyLala
/

mtDNALocation

Running

VyLala commited on Apr 13

Commit

c8f7c8d

verified ·

1 Parent(s): 4a39d10

Update mtdna_classifier.py

Files changed (1) hide show

mtdna_classifier.py CHANGED Viewed

@@ -236,17 +236,19 @@ def classify_mtDNA_sample_from_haplo(text):
     }
 # 4.3 Get from available NCBI
 def infer_location_fromNCBI(accession):
-    cmd = f'{os.environ["HOME"]}/edirect/esummary -db nuccore -id {accession} -format medline | egrep "location|country|geo"'
-    result = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
-    output, location = "",""
-    output = result.stdout
-    if "location" in output or "country" in output or "geo" in output:
-        location = output.split('"')[1]
-        output = output.split()[0]
-    else:
-      location = "Unknown"
-      output = "No location information found in NCBI."
-    return location, output
 # STEP 5: Main pipeline: accession -> 1. get pubmed id and isolate -> 2. get doi -> 3. get text -> 4. prediction -> 5. output: inferred location + explanation + confidence score
 def classify_sample_location(accession):

     }
 # 4.3 Get from available NCBI
 def infer_location_fromNCBI(accession):
+    try:
+        handle = Entrez.efetch(db="nuccore", id=accession, rettype="medline", retmode="text")
+        text = handle.read()
+        handle.close()
+        match = re.search(r'/(geo_loc_name|country|location)\s*=\s*"([^"]+)"', text)
+        if match:
+            return match.group(2)  # This is the value like "Brunei"
+        return None
+    except Exception as e:
+        print("❌ Entrez error:", e)
+        return ""
 # STEP 5: Main pipeline: accession -> 1. get pubmed id and isolate -> 2. get doi -> 3. get text -> 4. prediction -> 5. output: inferred location + explanation + confidence score
 def classify_sample_location(accession):