Spaces:
Runtime error
Runtime error
Commit
·
65f09a7
1
Parent(s):
2be1d3d
Create countriesIdentification
Browse files- countriesIdentification +706 -0
countriesIdentification
ADDED
|
@@ -0,0 +1,706 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import spacy
|
| 2 |
+
|
| 3 |
+
from geopy.geocoders import Nominatim
|
| 4 |
+
import geonamescache
|
| 5 |
+
import pycountry
|
| 6 |
+
|
| 7 |
+
from geotext import GeoText
|
| 8 |
+
|
| 9 |
+
import re
|
| 10 |
+
|
| 11 |
+
from transformers import BertTokenizer, BertModel
|
| 12 |
+
import torch
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
# Load the spacy model with GloVe embeddings
|
| 16 |
+
nlp = spacy.load("en_core_web_lg")
|
| 17 |
+
|
| 18 |
+
# load the pre-trained BERT tokenizer and model
|
| 19 |
+
tokenizer = BertTokenizer.from_pretrained('bert-base-cased')
|
| 20 |
+
model = BertModel.from_pretrained('bert-base-cased')
|
| 21 |
+
|
| 22 |
+
# Load valid city names from geonamescache
|
| 23 |
+
gc = geonamescache.GeonamesCache()
|
| 24 |
+
|
| 25 |
+
# There is a bug with geonamescache where some countries exist as cities (e.g. albania)
|
| 26 |
+
# So initially we delete any country reference from the cities
|
| 27 |
+
|
| 28 |
+
# Get a list of all country names
|
| 29 |
+
original_countries = set(country['name'] for country in gc.get_countries().values())
|
| 30 |
+
|
| 31 |
+
# Get a list of all the original city names
|
| 32 |
+
original_cities = set(city['name'] for city in gc.get_cities().values())
|
| 33 |
+
|
| 34 |
+
# Get a list of all country names that appear as city names
|
| 35 |
+
country_names = set(
|
| 36 |
+
country['name'] for country in gc.get_countries().values() if country['name'] not in original_cities)
|
| 37 |
+
|
| 38 |
+
# We also add these two cases because they have been asked by SERCO
|
| 39 |
+
country_names.add("Guinea Bissau")
|
| 40 |
+
country_names.add("Guinea bissau")
|
| 41 |
+
country_names.add("guinea Bissau")
|
| 42 |
+
country_names.add("guinea bissau")
|
| 43 |
+
country_names.add("Timor Leste")
|
| 44 |
+
country_names.add("Timor leste")
|
| 45 |
+
country_names.add("timor Leste")
|
| 46 |
+
country_names.add("timor leste")
|
| 47 |
+
country_names.add("UAE")
|
| 48 |
+
country_names.add("uae")
|
| 49 |
+
country_names.add("Uae")
|
| 50 |
+
country_names.add("Uk")
|
| 51 |
+
country_names.add("uK")
|
| 52 |
+
country_names.add("uk")
|
| 53 |
+
country_names.add("USa")
|
| 54 |
+
country_names.add("Usa")
|
| 55 |
+
country_names.add("usa")
|
| 56 |
+
country_names.add("uSa")
|
| 57 |
+
country_names.add("usA")
|
| 58 |
+
country_names.add("uSA")
|
| 59 |
+
country_names.add("Palestine")
|
| 60 |
+
|
| 61 |
+
# Get a list of all city names, excluding country names
|
| 62 |
+
city_names = set(city['name'] for city in gc.get_cities().values() if city['name'] not in original_countries)
|
| 63 |
+
|
| 64 |
+
city_names.add("Puebla de sanabria")
|
| 65 |
+
|
| 66 |
+
|
| 67 |
+
def flatten(lst):
|
| 68 |
+
"""
|
| 69 |
+
Define a helper function to flatten the list recursively
|
| 70 |
+
"""
|
| 71 |
+
|
| 72 |
+
for item in lst:
|
| 73 |
+
if isinstance(item, list):
|
| 74 |
+
yield from flatten(item)
|
| 75 |
+
else:
|
| 76 |
+
yield item
|
| 77 |
+
|
| 78 |
+
|
| 79 |
+
def is_country(reference):
|
| 80 |
+
"""
|
| 81 |
+
Check if a given reference is a valid country name
|
| 82 |
+
"""
|
| 83 |
+
try:
|
| 84 |
+
# Check if the reference is a valid city name from the first geoparse library
|
| 85 |
+
if reference in country_names:
|
| 86 |
+
return True
|
| 87 |
+
|
| 88 |
+
else:
|
| 89 |
+
# if not then use the pycountry library to verify if an input is a country
|
| 90 |
+
country = pycountry.countries.search_fuzzy(reference)[0]
|
| 91 |
+
|
| 92 |
+
temp_country_names = []
|
| 93 |
+
|
| 94 |
+
if country:
|
| 95 |
+
if hasattr(country, 'name') or hasattr(country, 'official_name') or hasattr(country, 'common_name'):
|
| 96 |
+
|
| 97 |
+
if hasattr(country, 'official_name'):
|
| 98 |
+
temp_country_names.append(country.official_name.lower())
|
| 99 |
+
if hasattr(country, 'name'):
|
| 100 |
+
temp_country_names.append(country.name.lower())
|
| 101 |
+
if hasattr(country, 'common_name'):
|
| 102 |
+
temp_country_names.append(country.common_name.lower())
|
| 103 |
+
if any(reference.lower()==elem for elem in temp_country_names):
|
| 104 |
+
return True
|
| 105 |
+
|
| 106 |
+
return False
|
| 107 |
+
|
| 108 |
+
except LookupError:
|
| 109 |
+
return False
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
def is_city(reference):
|
| 113 |
+
"""
|
| 114 |
+
Check if a given reference is a valid city name
|
| 115 |
+
"""
|
| 116 |
+
|
| 117 |
+
reference = reference.replace("x$x", "").strip()
|
| 118 |
+
|
| 119 |
+
# Check if the reference is a valid city name
|
| 120 |
+
if reference in city_names:
|
| 121 |
+
return True
|
| 122 |
+
|
| 123 |
+
# Load the Nomatim (open street maps) api
|
| 124 |
+
geolocator = Nominatim(user_agent="certh_serco_validate_city_app")
|
| 125 |
+
location = geolocator.geocode(reference, language="en", timeout=10)
|
| 126 |
+
|
| 127 |
+
# If a reference is identified as a 'city', 'town', or 'village', then it is indeed a city
|
| 128 |
+
if location.raw['type'] in ['city', 'town', 'village']:
|
| 129 |
+
return True
|
| 130 |
+
|
| 131 |
+
# If a reference is identified as 'administrative' (e.g. administrative area),
|
| 132 |
+
# then we further examine if the retrieved info is a single token (meaning a country) or a series of tokens (meaning a city)
|
| 133 |
+
# that condition takes place to separate some cases where small cities were identified as administrative areas
|
| 134 |
+
elif location.raw['type'] == 'administrative':
|
| 135 |
+
|
| 136 |
+
if len(location.raw['display_name'].split(",")) > 1:
|
| 137 |
+
return True
|
| 138 |
+
|
| 139 |
+
return False
|
| 140 |
+
|
| 141 |
+
|
| 142 |
+
def validate_locations(locations):
|
| 143 |
+
"""
|
| 144 |
+
Validate that the identified references are indeed a Country and a City
|
| 145 |
+
"""
|
| 146 |
+
|
| 147 |
+
validated_loc = []
|
| 148 |
+
|
| 149 |
+
for location in locations:
|
| 150 |
+
|
| 151 |
+
# validate whether it is a country
|
| 152 |
+
if is_country(location):
|
| 153 |
+
validated_loc.append((location, 'country'))
|
| 154 |
+
|
| 155 |
+
# validate whether it is a city
|
| 156 |
+
elif is_city(location):
|
| 157 |
+
validated_loc.append((location, 'city'))
|
| 158 |
+
|
| 159 |
+
else:
|
| 160 |
+
# Check if the location is a multi-word name
|
| 161 |
+
words = location.split()
|
| 162 |
+
if len(words) > 1:
|
| 163 |
+
|
| 164 |
+
# Try to find the country or city name among the words
|
| 165 |
+
for i in range(len(words)):
|
| 166 |
+
name = ' '.join(words[i:])
|
| 167 |
+
|
| 168 |
+
if is_country(name):
|
| 169 |
+
validated_loc.append((name, 'country'))
|
| 170 |
+
break
|
| 171 |
+
|
| 172 |
+
elif is_city(name):
|
| 173 |
+
validated_loc.append((name, 'city'))
|
| 174 |
+
break
|
| 175 |
+
|
| 176 |
+
return validated_loc
|
| 177 |
+
|
| 178 |
+
|
| 179 |
+
def identify_loc_ner(sentence):
|
| 180 |
+
"""
|
| 181 |
+
Identify all the geopolitical and location entities with the spacy tool
|
| 182 |
+
"""
|
| 183 |
+
|
| 184 |
+
doc = nlp(sentence)
|
| 185 |
+
|
| 186 |
+
ner_locations = []
|
| 187 |
+
|
| 188 |
+
# GPE and LOC are the labels for location entities in spaCy
|
| 189 |
+
for ent in doc.ents:
|
| 190 |
+
if ent.label_ in ['GPE', 'LOC']:
|
| 191 |
+
|
| 192 |
+
if len(ent.text.split()) > 1:
|
| 193 |
+
ner_locations.append(ent.text)
|
| 194 |
+
else:
|
| 195 |
+
for token in ent:
|
| 196 |
+
if token.ent_type_ == 'GPE':
|
| 197 |
+
ner_locations.append(ent.text)
|
| 198 |
+
break
|
| 199 |
+
|
| 200 |
+
return ner_locations
|
| 201 |
+
|
| 202 |
+
|
| 203 |
+
def identify_loc_geoparselibs(sentence):
|
| 204 |
+
"""
|
| 205 |
+
Identify cities and countries with 3 different geoparsing libraries
|
| 206 |
+
"""
|
| 207 |
+
|
| 208 |
+
geoparse_locations = []
|
| 209 |
+
|
| 210 |
+
# Geoparsing library 1
|
| 211 |
+
|
| 212 |
+
# Load geonames cache to check if a city name is valid
|
| 213 |
+
gc = geonamescache.GeonamesCache()
|
| 214 |
+
|
| 215 |
+
# Get a list of many countries/cities
|
| 216 |
+
countries = gc.get_countries()
|
| 217 |
+
cities = gc.get_cities()
|
| 218 |
+
|
| 219 |
+
city_names = [city['name'] for city in cities.values()]
|
| 220 |
+
country_names = [country['name'] for country in countries.values()]
|
| 221 |
+
|
| 222 |
+
# if any word sequence in our sentence is one of those countries/cities identify it
|
| 223 |
+
words = sentence.split()
|
| 224 |
+
for i in range(len(words)):
|
| 225 |
+
for j in range(i + 1, len(words) + 1):
|
| 226 |
+
word_seq = ' '.join(words[i:j])
|
| 227 |
+
if word_seq in city_names or word_seq in country_names:
|
| 228 |
+
geoparse_locations.append(word_seq)
|
| 229 |
+
|
| 230 |
+
# Geoparsing library 2
|
| 231 |
+
|
| 232 |
+
# similarly with the pycountry library
|
| 233 |
+
for country in pycountry.countries:
|
| 234 |
+
if country.name in sentence:
|
| 235 |
+
geoparse_locations.append(country.name)
|
| 236 |
+
|
| 237 |
+
# Geoparsing library 3
|
| 238 |
+
|
| 239 |
+
# similarly with the geotext library
|
| 240 |
+
places = GeoText(sentence)
|
| 241 |
+
cities = list(places.cities)
|
| 242 |
+
countries = list(places.countries)
|
| 243 |
+
|
| 244 |
+
if cities:
|
| 245 |
+
geoparse_locations += cities
|
| 246 |
+
if countries:
|
| 247 |
+
geoparse_locations += countries
|
| 248 |
+
|
| 249 |
+
return (geoparse_locations, countries, cities)
|
| 250 |
+
|
| 251 |
+
|
| 252 |
+
def identify_loc_regex(sentence):
|
| 253 |
+
"""
|
| 254 |
+
Identify cities and countries with regular expression matching
|
| 255 |
+
"""
|
| 256 |
+
|
| 257 |
+
regex_locations = []
|
| 258 |
+
|
| 259 |
+
# Country and cities references can be preceded by 'in', 'from' or 'of'
|
| 260 |
+
pattern = r"\b(in|from|of)\b\s([\w\s]+)"
|
| 261 |
+
additional_refs = re.findall(pattern, sentence)
|
| 262 |
+
|
| 263 |
+
for match in additional_refs:
|
| 264 |
+
regex_locations.append(match[1])
|
| 265 |
+
|
| 266 |
+
return regex_locations
|
| 267 |
+
|
| 268 |
+
|
| 269 |
+
def identify_loc_embeddings(sentence, countries, cities):
|
| 270 |
+
"""
|
| 271 |
+
Identify cities and countries with the BERT pre-trained embeddings matching
|
| 272 |
+
"""
|
| 273 |
+
|
| 274 |
+
embd_locations = []
|
| 275 |
+
|
| 276 |
+
# Define a list of country and city names (those are given by the geonamescache library before)
|
| 277 |
+
countries_cities = countries + cities
|
| 278 |
+
|
| 279 |
+
# Concatenate multi-word countries and cities into a single string
|
| 280 |
+
multiword_countries = [c.replace(' ', '_') for c in countries if ' ' in c]
|
| 281 |
+
multiword_cities = [c.replace(' ', '_') for c in cities if ' ' in c]
|
| 282 |
+
countries_cities += multiword_countries + multiword_cities
|
| 283 |
+
|
| 284 |
+
# Preprocess the input sentence
|
| 285 |
+
tokens = tokenizer.tokenize(sentence)
|
| 286 |
+
input_ids = torch.tensor([tokenizer.convert_tokens_to_ids(tokens)])
|
| 287 |
+
|
| 288 |
+
# Get the BERT embeddings for the input sentence
|
| 289 |
+
with torch.no_grad():
|
| 290 |
+
embeddings = model(input_ids)[0][0]
|
| 291 |
+
|
| 292 |
+
# Find the country and city names in the input sentence
|
| 293 |
+
for i in range(len(tokens)):
|
| 294 |
+
token = tokens[i]
|
| 295 |
+
if token in countries_cities:
|
| 296 |
+
embd_locations.append(token)
|
| 297 |
+
else:
|
| 298 |
+
word_vector = embeddings[i]
|
| 299 |
+
similarity_scores = torch.nn.functional.cosine_similarity(word_vector.unsqueeze(0), embeddings)
|
| 300 |
+
similar_tokens = [tokens[j] for j in similarity_scores.argsort(descending=True)[1:6]]
|
| 301 |
+
for word in similar_tokens:
|
| 302 |
+
if word in countries_cities and similarity_scores[tokens.index(word)] > 0.5:
|
| 303 |
+
embd_locations.append(word)
|
| 304 |
+
|
| 305 |
+
# Convert back multi-word country and city names to original form
|
| 306 |
+
embd_locations = [loc.replace('_', ' ') if '_' in loc else loc for loc in embd_locations]
|
| 307 |
+
|
| 308 |
+
return embd_locations
|
| 309 |
+
|
| 310 |
+
|
| 311 |
+
|
| 312 |
+
def multiple_country_city_identifications_solve(country_city_dict):
|
| 313 |
+
"""
|
| 314 |
+
This is a function to solve the appearance of multiple identification of countries and cities.
|
| 315 |
+
It checks all the elements of the input dictionary and if any smaller length element exists as a substring inside
|
| 316 |
+
a bigger length element of it, it deletes the smaller size one. In that sense, a dictionary of the sort
|
| 317 |
+
{'city': ['Port moresby', 'Port'], 'country': ['Guinea', 'Papua new guinea']} will be converted into
|
| 318 |
+
{'city': ['Port moresby'], 'country': ['Papua new guinea']}.
|
| 319 |
+
|
| 320 |
+
The reason for that function, is because such type of incosistencies were identified during country/city identification,
|
| 321 |
+
propably relevant to the geoparsing libraries in use
|
| 322 |
+
"""
|
| 323 |
+
|
| 324 |
+
try:
|
| 325 |
+
|
| 326 |
+
country_flag = False
|
| 327 |
+
city_flag = False
|
| 328 |
+
|
| 329 |
+
# to avoid examining any element in any case, we validate that both a country and a city exist
|
| 330 |
+
# on the input dictionary and that they are of length more than one (which is the target case for us)
|
| 331 |
+
if 'country' in country_city_dict:
|
| 332 |
+
if len(country_city_dict['country']) > 1:
|
| 333 |
+
country_flag = True
|
| 334 |
+
|
| 335 |
+
if 'city' in country_city_dict:
|
| 336 |
+
if len(country_city_dict['city']) > 1:
|
| 337 |
+
city_flag = True
|
| 338 |
+
|
| 339 |
+
# at first cope with country multiple iterative references
|
| 340 |
+
if country_flag:
|
| 341 |
+
|
| 342 |
+
# Sort the countries by length, longest first
|
| 343 |
+
country_city_dict['country'].sort(key=lambda x: len(x), reverse=True)
|
| 344 |
+
|
| 345 |
+
# Create a new list of countries that don't contain any substrings
|
| 346 |
+
cleaned_countries = []
|
| 347 |
+
for i in range(len(country_city_dict['country'])):
|
| 348 |
+
is_substring = False
|
| 349 |
+
for j in range(len(cleaned_countries)):
|
| 350 |
+
if country_city_dict['country'][i].lower().find(cleaned_countries[j].lower()) != -1:
|
| 351 |
+
# If the i-th country is a substring of an already-cleaned country, skip it
|
| 352 |
+
is_substring = True
|
| 353 |
+
break
|
| 354 |
+
if not is_substring:
|
| 355 |
+
cleaned_countries.append(country_city_dict['country'][i])
|
| 356 |
+
|
| 357 |
+
# Replace the original list of countries with the cleaned one
|
| 358 |
+
country_city_dict['country'] = cleaned_countries
|
| 359 |
+
|
| 360 |
+
# Create a new list of countries that are not substrings of other countries
|
| 361 |
+
final_countries = []
|
| 362 |
+
for i in range(len(country_city_dict['country'])):
|
| 363 |
+
is_superstring = False
|
| 364 |
+
for j in range(len(country_city_dict['country'])):
|
| 365 |
+
if i == j:
|
| 366 |
+
continue
|
| 367 |
+
if country_city_dict['country'][j].lower().find(country_city_dict['country'][i].lower()) != -1:
|
| 368 |
+
# If the i-th country is a substring of a different country, skip it
|
| 369 |
+
is_superstring = True
|
| 370 |
+
break
|
| 371 |
+
if not is_superstring:
|
| 372 |
+
final_countries.append(country_city_dict['country'][i])
|
| 373 |
+
|
| 374 |
+
# Replace the original list of countries with the final one
|
| 375 |
+
country_city_dict['country'] = final_countries
|
| 376 |
+
|
| 377 |
+
# then cope with city multiple iterative references
|
| 378 |
+
if city_flag:
|
| 379 |
+
|
| 380 |
+
# Sort the cities by length, longest first
|
| 381 |
+
country_city_dict['city'].sort(key=lambda x: len(x), reverse=True)
|
| 382 |
+
|
| 383 |
+
# Create a new list of cities that don't contain any substrings
|
| 384 |
+
cleaned_cities = []
|
| 385 |
+
for i in range(len(country_city_dict['city'])):
|
| 386 |
+
is_substring = False
|
| 387 |
+
for j in range(len(cleaned_cities)):
|
| 388 |
+
if country_city_dict['city'][i].lower().find(cleaned_cities[j].lower()) != -1:
|
| 389 |
+
# If the i-th city is a substring of an already-cleaned city, skip it
|
| 390 |
+
is_substring = True
|
| 391 |
+
break
|
| 392 |
+
if not is_substring:
|
| 393 |
+
cleaned_cities.append(country_city_dict['city'][i])
|
| 394 |
+
|
| 395 |
+
# Replace the original list of cities with the cleaned one
|
| 396 |
+
country_city_dict['city'] = cleaned_cities
|
| 397 |
+
|
| 398 |
+
# Create a new list of cities that are not substrings of other cities
|
| 399 |
+
final_cities = []
|
| 400 |
+
for i in range(len(country_city_dict['city'])):
|
| 401 |
+
is_superstring = False
|
| 402 |
+
for j in range(len(country_city_dict['city'])):
|
| 403 |
+
if i == j:
|
| 404 |
+
continue
|
| 405 |
+
if country_city_dict['city'][j].lower().find(country_city_dict['city'][i].lower()) != -1:
|
| 406 |
+
# If the i-th city is a substring of a different city, skip it
|
| 407 |
+
is_superstring = True
|
| 408 |
+
break
|
| 409 |
+
if not is_superstring:
|
| 410 |
+
final_cities.append(country_city_dict['city'][i])
|
| 411 |
+
|
| 412 |
+
# Replace the original list of cities with the final one
|
| 413 |
+
country_city_dict['city'] = final_cities
|
| 414 |
+
|
| 415 |
+
# return the final dictionary
|
| 416 |
+
if country_city_dict:
|
| 417 |
+
return country_city_dict
|
| 418 |
+
|
| 419 |
+
except:
|
| 420 |
+
return (0, "LOCATION", "unknown_error")
|
| 421 |
+
|
| 422 |
+
|
| 423 |
+
def helper_resolve_cities(sentence, locations):
|
| 424 |
+
"""
|
| 425 |
+
Verify that the city captured does not belong to the capture country. If so delete it, unless there is also a second reference on the original sentence
|
| 426 |
+
(which might be the case of a city with a similar name/substring of a country)
|
| 427 |
+
"""
|
| 428 |
+
|
| 429 |
+
if 'country' in locations and 'city' in locations:
|
| 430 |
+
|
| 431 |
+
# Check if any city names are also present in the corresponding country name
|
| 432 |
+
for country in locations['country']:
|
| 433 |
+
for city in locations['city']:
|
| 434 |
+
|
| 435 |
+
if city.lower() in country.lower():
|
| 436 |
+
# If the city name is found in the country name, check how many times it appears in the sentence
|
| 437 |
+
city_count = len(re.findall(city, sentence, re.IGNORECASE))
|
| 438 |
+
if city_count == 1:
|
| 439 |
+
# If the city appears only once, remove it from the locations dictionary
|
| 440 |
+
locations['city'] = [c for c in locations['city'] if c != city]
|
| 441 |
+
|
| 442 |
+
return locations
|
| 443 |
+
|
| 444 |
+
|
| 445 |
+
def helper_delete_city_reference(locations):
|
| 446 |
+
"""
|
| 447 |
+
If the 'city' reference was captured by mistake by the system, delete it, unless it belongs to the cities that should contain it (e.g. Mexico city)
|
| 448 |
+
"""
|
| 449 |
+
|
| 450 |
+
city_cities = ["Adamstown City", "Alexander City", "Angeles City", "Antipolo City", "Arizona City", "Arkansas City",
|
| 451 |
+
"Ashley City", "Atlantic City", "Bacolod City", "Bacoor City", "Bago City", "Baguio City",
|
| 452 |
+
"Baker City", "Baltimore City", "Batangas City", "Bay City", "Belgrade City", "Belize City",
|
| 453 |
+
"Benin City", "Big Bear City", "Bossier City", "Boulder City", "Brazil City", "Bridge City",
|
| 454 |
+
"Brigham City", "Brighton City", "Bristol City", "Buckeye City", "Bullhead City", "Butuan City",
|
| 455 |
+
"Cabanatuan City", "Calamba City", "Calbayog City", "California City", "Caloocan City",
|
| 456 |
+
"Calumet City", "Candon City", "Canon City", "Carcar City", "Carson City", "Castries City",
|
| 457 |
+
"Cathedral City", "Cavite City", "Cebu City", "Cedar City", "Central Falls City", "Century City",
|
| 458 |
+
"Cestos City", "City Bell", "City Terrace", "City of Balikpapan", "City of Calamba",
|
| 459 |
+
"City of Gold Coast", "City of Industry", "City of Isabela", "City of Orange", "City of Paranaque",
|
| 460 |
+
"City of Parramatta", "City of Shoalhaven", "Collier City", "Columbia City", "Commerce City",
|
| 461 |
+
"Cooper City", "Cotabato City", "Crescent City", "Crescent City North", "Culver City",
|
| 462 |
+
"Dagupan City", "Dale City", "Dali City", "Daly City", "Danao City", "Dasmariñas City", "Davao City",
|
| 463 |
+
"De Forest City", "Del City", "Dhaka City", "Dipolog City", "Dodge City", "Dumaguete City",
|
| 464 |
+
"El Centro City", "Elizabeth City", "Elk City", "Ellicott City", "Emeryville City", "Fernley City",
|
| 465 |
+
"Florida City", "Forest City", "Forrest City", "Foster City", "Freeport City", "Garden City",
|
| 466 |
+
"Gdynia City", "General Santos City", "General Trias City", "Gloucester City", "Granite City",
|
| 467 |
+
"Green City", "Grove City", "Guatemala City", "Haines City", "Haltom City", "Harbor City",
|
| 468 |
+
"Havre City", "Highland City", "Ho Chi Minh City", "Holiday City", "Horizon City", "Hyderabad City",
|
| 469 |
+
"Iligan City", "Iloilo City", "Imus City", "Iowa City", "Iriga City", "Isabela City", "Jacinto City",
|
| 470 |
+
"James City County", "Jefferson City", "Jersey City", "Jhang City", "Jincheng City", "Johnson City",
|
| 471 |
+
"Junction City", "Kaiyuan City", "Kansas City", "King City", "Kingman City", "Kingston City",
|
| 472 |
+
"Koror City", "Kowloon City", "Kuwait City", "Lake City", "Lake Havasu City", "Laoag City",
|
| 473 |
+
"Lapu-Lapu City", "Las Pinas City", "Las Piñas City", "League City", "Legazpi City", "Leisure City",
|
| 474 |
+
"Lenoir City", "Ligao City", "Lincoln City", "Linyi City", "Lipa City", "Loma Linda City",
|
| 475 |
+
"Lucena City", "Madrid City", "Makati City", "Malabon City", "Mandaluyong City", "Mandaue City",
|
| 476 |
+
"Manukau City", "Marawi City", "Marikina City", "Maryland City", "Mason City", "McKee City",
|
| 477 |
+
"Mexico City", "Mexico City Beach", "Michigan City", "Midwest City", "Mineral City", "Missouri City",
|
| 478 |
+
"Morehead City", "Morgan City", "Muntinlupa City", "Naga City", "Nagasaki City", "National City",
|
| 479 |
+
"Navotas City", "Nay Pyi Taw City", "Nevada City", "New City", "New York City", "Norwich City",
|
| 480 |
+
"Ocean City", "Oil City", "Oklahoma City", "Olongapo City", "Orange City", "Oregon City",
|
| 481 |
+
"Ozamiz City", "Pagadian City", "Palayan City", "Palm City", "Panabo City", "Panama City",
|
| 482 |
+
"Panama City", "Panama City Beach", "Parañaque City", "Park City", "Pasay City", "Peachtree City",
|
| 483 |
+
"Pearl City", "Pell City", "Phenix City", "Plant City", "Ponca City", "Port Augusta City",
|
| 484 |
+
"Port Pirie City", "Quad Cities", "Quartzsite City", "Quebec City", "Quezon City", "Quezon City",
|
| 485 |
+
"Rainbow City", "Rapid City", "Red City", "Redwood City", "Richmond City", "Rio Grande City",
|
| 486 |
+
"Roxas City", "Royse City", "Salt Lake City", "Salt Lake City", "Samal City", "San Carlos City",
|
| 487 |
+
"San Carlos City", "San Fernando City", "San Fernando City", "San Fernando City", "San Jose City",
|
| 488 |
+
"San Jose City", "San Juan City", "San Juan City", "San Pedro City", "Santa Rosa City",
|
| 489 |
+
"Science City of Munoz", "Shelby City", "Sialkot City", "Silver City", "Sioux City",
|
| 490 |
+
"South Lake Tahoe City", "South Sioux City", "Studio City", "Suisun City", "Summit Park City",
|
| 491 |
+
"Sun City", "Sun City Center", "Sun City West", "Sun City West", "Suva City", "Tabaco City",
|
| 492 |
+
"Tacloban City", "Tagbilaran City", "Taguig City", "Tagum City", "Talisay City", "Tanauan City",
|
| 493 |
+
"Tarlac City", "Tauranga City", "Tayabas City", "Temple City", "Texas City", "Thomas City",
|
| 494 |
+
"Tipp City", "Toledo City", "Traverse City", "Trece Martires City", "Tuba City", "Union City",
|
| 495 |
+
"Universal City", "University City", "Upper Hutt City", "Valencia City", "Valenzuela City",
|
| 496 |
+
"Vatican City", "Vatican City", "Ventnor City", "Webb City", "Wellington City", "Welwyn Garden City",
|
| 497 |
+
"West Valley City", "White City", "Yazoo City", "Yuba City", "Zamboanga City"]
|
| 498 |
+
|
| 499 |
+
if 'city' in locations:
|
| 500 |
+
for city in locations['city']:
|
| 501 |
+
if 'city' in city:
|
| 502 |
+
if not city in city_cities:
|
| 503 |
+
city = city.replace("city", "")
|
| 504 |
+
|
| 505 |
+
elif 'City' in city:
|
| 506 |
+
if not city in city_cities:
|
| 507 |
+
city = city.replace("City", "")
|
| 508 |
+
|
| 509 |
+
locations['city'] = city
|
| 510 |
+
|
| 511 |
+
# Convert city values to a list
|
| 512 |
+
if isinstance(locations['city'], str):
|
| 513 |
+
locations['city'] = [locations['city']]
|
| 514 |
+
|
| 515 |
+
return locations
|
| 516 |
+
|
| 517 |
+
|
| 518 |
+
def helper_delete_country_reference(locations):
|
| 519 |
+
"""
|
| 520 |
+
If the 'country' reference was captured by mistake by the system and exists in a city name, delete it
|
| 521 |
+
"""
|
| 522 |
+
|
| 523 |
+
country_city_same = ["djibouti", "guatemala", "mexico", "panama", "san marino", "singapore", "vatican"]
|
| 524 |
+
|
| 525 |
+
if 'country' in locations:
|
| 526 |
+
for i, country in enumerate(locations['country']):
|
| 527 |
+
|
| 528 |
+
if country.lower() not in country_city_same:
|
| 529 |
+
split_country = country.lower().split()
|
| 530 |
+
|
| 531 |
+
if 'city' in locations:
|
| 532 |
+
for j, city in enumerate(locations['city']):
|
| 533 |
+
split_city = city.lower().split()
|
| 534 |
+
|
| 535 |
+
for substring in split_country:
|
| 536 |
+
if substring in split_city:
|
| 537 |
+
split_city.remove(substring)
|
| 538 |
+
new_city = ' '.join(split_city)
|
| 539 |
+
locations['city'][j] = new_city.strip()
|
| 540 |
+
|
| 541 |
+
return locations
|
| 542 |
+
|
| 543 |
+
|
| 544 |
+
def identify_locations(sentence):
|
| 545 |
+
"""
|
| 546 |
+
Identify all the possible Country and City references in the given sentence, using different approaches in a hybrid manner
|
| 547 |
+
"""
|
| 548 |
+
|
| 549 |
+
locations = []
|
| 550 |
+
extra_serco_countries = False
|
| 551 |
+
|
| 552 |
+
try:
|
| 553 |
+
# # # this is because there were cases were a city followed by comma was not understood by the system
|
| 554 |
+
|
| 555 |
+
sentence = sentence.replace(",", " x$x ")
|
| 556 |
+
|
| 557 |
+
# Serco wanted to also handle these two cases without the symbol "-". The only way to do that is by hardcoding it
|
| 558 |
+
if "Timor Leste" in sentence:
|
| 559 |
+
extra_serco_countries = True
|
| 560 |
+
locations.append("Timor Leste")
|
| 561 |
+
|
| 562 |
+
if "Guinea Bissau" in sentence:
|
| 563 |
+
extra_serco_countries = True
|
| 564 |
+
locations.append("Guinea Bissau")
|
| 565 |
+
|
| 566 |
+
# ner
|
| 567 |
+
locations.append(identify_loc_ner(sentence))
|
| 568 |
+
|
| 569 |
+
# geoparse libs
|
| 570 |
+
geoparse_list, countries, cities = identify_loc_geoparselibs(sentence)
|
| 571 |
+
locations.append(geoparse_list)
|
| 572 |
+
|
| 573 |
+
# flatten the geoparse list
|
| 574 |
+
locations_flat_1 = list(flatten(locations))
|
| 575 |
+
|
| 576 |
+
# regex
|
| 577 |
+
locations_flat_1.append(identify_loc_regex(sentence))
|
| 578 |
+
|
| 579 |
+
# flatten the regex list
|
| 580 |
+
locations_flat_2 = list(flatten(locations))
|
| 581 |
+
|
| 582 |
+
# embeddings
|
| 583 |
+
locations_flat_2.append(identify_loc_embeddings(sentence, countries, cities))
|
| 584 |
+
|
| 585 |
+
# flatten the embeddings list
|
| 586 |
+
locations_flat_3 = list(flatten(locations))
|
| 587 |
+
|
| 588 |
+
# remove duplicates while also taking under consideration capitalization (e.g. a reference of italy should be valid, while also a reference of Italy and italy)
|
| 589 |
+
# Lowercase the words and get their unique references using set()
|
| 590 |
+
loc_unique = set([loc.lower() for loc in locations_flat_3])
|
| 591 |
+
|
| 592 |
+
# Create a new list of locations with initial capitalization, removing duplicates
|
| 593 |
+
loc_capitalization = list(
|
| 594 |
+
set([loc.capitalize() if loc.lower() in loc_unique else loc.lower() for loc in locations_flat_3]))
|
| 595 |
+
|
| 596 |
+
# That calculation checks whether there are substrings contained in another string. E.g. for the case of [timor leste, timor], it should remove "timor"
|
| 597 |
+
if extra_serco_countries:
|
| 598 |
+
loc_capitalization_cp = loc_capitalization.copy()
|
| 599 |
+
for i, loc1 in enumerate(loc_capitalization):
|
| 600 |
+
for j, loc2 in enumerate(loc_capitalization):
|
| 601 |
+
if i != j and loc1 in loc2:
|
| 602 |
+
loc_capitalization_cp.remove(loc1)
|
| 603 |
+
break
|
| 604 |
+
|
| 605 |
+
loc_capitalization = loc_capitalization_cp
|
| 606 |
+
|
| 607 |
+
# validate that indeed each one of the countries/cities are indeed countries/cities
|
| 608 |
+
validated_locations = validate_locations(loc_capitalization)
|
| 609 |
+
|
| 610 |
+
# create a proper dictionary with country/city tags and the relevant entries as a result
|
| 611 |
+
loc_dict = {}
|
| 612 |
+
for location, loc_type in validated_locations:
|
| 613 |
+
if loc_type not in loc_dict:
|
| 614 |
+
loc_dict[loc_type] = []
|
| 615 |
+
loc_dict[loc_type].append(location)
|
| 616 |
+
|
| 617 |
+
# bring sentence on previous form
|
| 618 |
+
sentence = sentence.replace(" x$x ", ",")
|
| 619 |
+
|
| 620 |
+
# cope with cases of iterative country or city reference due to geoparse lib issues
|
| 621 |
+
locations_dict = multiple_country_city_identifications_solve(loc_dict)
|
| 622 |
+
|
| 623 |
+
if locations_dict == None:
|
| 624 |
+
return (0, "LOCATION", "no_country")
|
| 625 |
+
# return {'city':[], 'country':[]}
|
| 626 |
+
|
| 627 |
+
else:
|
| 628 |
+
# conditions for multiple references
|
| 629 |
+
# it is mandatory that a country will exist
|
| 630 |
+
if 'country' in locations_dict:
|
| 631 |
+
|
| 632 |
+
# if a city exists
|
| 633 |
+
if 'city' in locations_dict:
|
| 634 |
+
resolved_dict = helper_resolve_cities(sentence, locations_dict)
|
| 635 |
+
|
| 636 |
+
# we accept one country and one city
|
| 637 |
+
if len(resolved_dict['country']) == 1 and len(resolved_dict['city']) == 1:
|
| 638 |
+
|
| 639 |
+
# capitalize because there may be cases that it will return 'italy'
|
| 640 |
+
resolved_dict['country'][0] = resolved_dict['country'][0].capitalize()
|
| 641 |
+
|
| 642 |
+
# there were some cases that the 'x$x' was not removed
|
| 643 |
+
for key, values in resolved_dict.items():
|
| 644 |
+
for i, value in enumerate(values):
|
| 645 |
+
if 'x$x' in value:
|
| 646 |
+
values[i] = value.replace('x$x', '')
|
| 647 |
+
|
| 648 |
+
delete_city = helper_delete_city_reference(resolved_dict)
|
| 649 |
+
|
| 650 |
+
return helper_delete_country_reference(delete_city)
|
| 651 |
+
|
| 652 |
+
|
| 653 |
+
# we can accept an absence of city but a country is always mandatory
|
| 654 |
+
elif len(resolved_dict['country']) == 1 and len(resolved_dict['city']) == 0:
|
| 655 |
+
|
| 656 |
+
resolved_dict['country'][0] = resolved_dict['country'][0].capitalize()
|
| 657 |
+
|
| 658 |
+
# there were some cases that the 'x$x' was not removed
|
| 659 |
+
for key, values in resolved_dict.items():
|
| 660 |
+
for i, value in enumerate(values):
|
| 661 |
+
if 'x$x' in value:
|
| 662 |
+
values[i] = value.replace('x$x', '')
|
| 663 |
+
|
| 664 |
+
delete_city = helper_delete_city_reference(resolved_dict)
|
| 665 |
+
|
| 666 |
+
return helper_delete_country_reference(delete_city)
|
| 667 |
+
|
| 668 |
+
# error if more than one country or city
|
| 669 |
+
else:
|
| 670 |
+
return (0, "LOCATION", "more_city_or_country")
|
| 671 |
+
|
| 672 |
+
|
| 673 |
+
# if a city does not exist
|
| 674 |
+
else:
|
| 675 |
+
# we only accept for one country
|
| 676 |
+
if len(locations_dict['country']) == 1:
|
| 677 |
+
|
| 678 |
+
locations_dict['country'][0] = locations_dict['country'][0].capitalize()
|
| 679 |
+
|
| 680 |
+
# there were some cases that the 'x$x' was not removed
|
| 681 |
+
for key, values in locations_dict.items():
|
| 682 |
+
for i, value in enumerate(values):
|
| 683 |
+
if 'x$x' in value:
|
| 684 |
+
values[i] = value.replace('x$x', '')
|
| 685 |
+
|
| 686 |
+
resolved_cities = helper_resolve_cities(sentence, locations_dict)
|
| 687 |
+
delete_city = helper_delete_city_reference(resolved_cities)
|
| 688 |
+
|
| 689 |
+
help_city = helper_delete_country_reference(delete_city)
|
| 690 |
+
|
| 691 |
+
if not 'city' in help_city:
|
| 692 |
+
help_city['city'] = [0]
|
| 693 |
+
|
| 694 |
+
return help_city
|
| 695 |
+
|
| 696 |
+
# error if more than one country
|
| 697 |
+
else:
|
| 698 |
+
return (0, "LOCATION", "more_country")
|
| 699 |
+
|
| 700 |
+
# error if no country is referred
|
| 701 |
+
else:
|
| 702 |
+
return (0, "LOCATION", "no_country")
|
| 703 |
+
|
| 704 |
+
except:
|
| 705 |
+
# handle the exception if any errors occur while identifying a country/city
|
| 706 |
+
return (0, "LOCATION", "unknown_error")
|