Spaces:
Running
Running
Upload mtdna_classifier.py
Browse files- mtdna_classifier.py +6 -6
mtdna_classifier.py
CHANGED
@@ -135,18 +135,18 @@ def load_haplogroup_mapping(csv_path):
|
|
135 |
|
136 |
# Function to extract haplogroup from the text
|
137 |
def extract_haplogroup(text):
|
138 |
-
# 1. Try to find a haplogroup preceded by the word "haplogroup"
|
139 |
match = re.search(r'\bhaplogroup\s+([A-Z][0-9a-z]*)\b', text)
|
140 |
if match:
|
141 |
-
|
142 |
-
|
143 |
-
|
144 |
-
|
|
|
145 |
fallback = re.search(r'\b([A-Z][0-9a-z]{1,5})\b', text)
|
146 |
if fallback:
|
147 |
return fallback.group(1)
|
|
|
148 |
|
149 |
-
return None # If nothing found
|
150 |
|
151 |
# Function to extract location based on NER
|
152 |
def extract_location(text):
|
|
|
135 |
|
136 |
# Function to extract haplogroup from the text
|
137 |
def extract_haplogroup(text):
|
|
|
138 |
match = re.search(r'\bhaplogroup\s+([A-Z][0-9a-z]*)\b', text)
|
139 |
if match:
|
140 |
+
submatch = re.match(r'^[A-Z][0-9]*', match.group(1))
|
141 |
+
if submatch:
|
142 |
+
return submatch.group(0)
|
143 |
+
else:
|
144 |
+
return match.group(1) # fallback
|
145 |
fallback = re.search(r'\b([A-Z][0-9a-z]{1,5})\b', text)
|
146 |
if fallback:
|
147 |
return fallback.group(1)
|
148 |
+
return None
|
149 |
|
|
|
150 |
|
151 |
# Function to extract location based on NER
|
152 |
def extract_location(text):
|