VyLala commited on
Commit
d8e40f0
·
verified ·
1 Parent(s): 6da8509

Upload mtdna_classifier.py

Browse files
Files changed (1) hide show
  1. mtdna_classifier.py +6 -6
mtdna_classifier.py CHANGED
@@ -135,18 +135,18 @@ def load_haplogroup_mapping(csv_path):
135
 
136
  # Function to extract haplogroup from the text
137
  def extract_haplogroup(text):
138
- # 1. Try to find a haplogroup preceded by the word "haplogroup"
139
  match = re.search(r'\bhaplogroup\s+([A-Z][0-9a-z]*)\b', text)
140
  if match:
141
- return re.match(r'^[A-Z][0-9]*', match.group(1)).group(0)
142
- #return match.group(1) # This is the actual haplogroup code like U5b1
143
-
144
- # 2. Fallback: try to find isolated uppercase-letter haplogroup codes
 
145
  fallback = re.search(r'\b([A-Z][0-9a-z]{1,5})\b', text)
146
  if fallback:
147
  return fallback.group(1)
 
148
 
149
- return None # If nothing found
150
 
151
  # Function to extract location based on NER
152
  def extract_location(text):
 
135
 
136
  # Function to extract haplogroup from the text
137
  def extract_haplogroup(text):
 
138
  match = re.search(r'\bhaplogroup\s+([A-Z][0-9a-z]*)\b', text)
139
  if match:
140
+ submatch = re.match(r'^[A-Z][0-9]*', match.group(1))
141
+ if submatch:
142
+ return submatch.group(0)
143
+ else:
144
+ return match.group(1) # fallback
145
  fallback = re.search(r'\b([A-Z][0-9a-z]{1,5})\b', text)
146
  if fallback:
147
  return fallback.group(1)
148
+ return None
149
 
 
150
 
151
  # Function to extract location based on NER
152
  def extract_location(text):