VyLala commited on
Commit
c8f7c8d
·
verified ·
1 Parent(s): 4a39d10

Update mtdna_classifier.py

Browse files
Files changed (1) hide show
  1. mtdna_classifier.py +13 -11
mtdna_classifier.py CHANGED
@@ -236,17 +236,19 @@ def classify_mtDNA_sample_from_haplo(text):
236
  }
237
  # 4.3 Get from available NCBI
238
  def infer_location_fromNCBI(accession):
239
- cmd = f'{os.environ["HOME"]}/edirect/esummary -db nuccore -id {accession} -format medline | egrep "location|country|geo"'
240
- result = subprocess.run(cmd, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
241
- output, location = "",""
242
- output = result.stdout
243
- if "location" in output or "country" in output or "geo" in output:
244
- location = output.split('"')[1]
245
- output = output.split()[0]
246
- else:
247
- location = "Unknown"
248
- output = "No location information found in NCBI."
249
- return location, output
 
 
250
 
251
  # STEP 5: Main pipeline: accession -> 1. get pubmed id and isolate -> 2. get doi -> 3. get text -> 4. prediction -> 5. output: inferred location + explanation + confidence score
252
  def classify_sample_location(accession):
 
236
  }
237
  # 4.3 Get from available NCBI
238
  def infer_location_fromNCBI(accession):
239
+ try:
240
+ handle = Entrez.efetch(db="nuccore", id=accession, rettype="medline", retmode="text")
241
+ text = handle.read()
242
+ handle.close()
243
+ match = re.search(r'/(geo_loc_name|country|location)\s*=\s*"([^"]+)"', text)
244
+ if match:
245
+ return match.group(2) # This is the value like "Brunei"
246
+ return None
247
+
248
+ except Exception as e:
249
+ print("❌ Entrez error:", e)
250
+ return ""
251
+
252
 
253
  # STEP 5: Main pipeline: accession -> 1. get pubmed id and isolate -> 2. get doi -> 3. get text -> 4. prediction -> 5. output: inferred location + explanation + confidence score
254
  def classify_sample_location(accession):