Spaces:
Running
Running
Update standardize_location.py
Browse files- standardize_location.py +75 -73
standardize_location.py
CHANGED
@@ -1,74 +1,76 @@
|
|
1 |
-
import requests
|
2 |
-
import re
|
3 |
-
|
4 |
-
# Normalize input
|
5 |
-
def normalize_key(text):
|
6 |
-
return re.sub(r"[^a-z0-9]", "", text.strip().lower())
|
7 |
-
|
8 |
-
# Search for city/place (normal flow)
|
9 |
-
def get_country_from_geonames(city_name
|
10 |
-
url =
|
11 |
-
|
12 |
-
|
13 |
-
"
|
14 |
-
"
|
15 |
-
|
16 |
-
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
country.get("
|
42 |
-
|
43 |
-
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
|
|
|
|
74 |
return "Not found"
|
|
|
1 |
+
import requests
|
2 |
+
import re
|
3 |
+
|
4 |
+
# Normalize input
|
5 |
+
def normalize_key(text):
|
6 |
+
return re.sub(r"[^a-z0-9]", "", text.strip().lower())
|
7 |
+
|
8 |
+
# Search for city/place (normal flow)
|
9 |
+
def get_country_from_geonames(city_name):
|
10 |
+
url = os.environ["URL_SEARCHJSON"]
|
11 |
+
username = os.environ["USERNAME_GEO"]
|
12 |
+
params = {
|
13 |
+
"q": city_name,
|
14 |
+
"maxRows": 1,
|
15 |
+
"username": username
|
16 |
+
}
|
17 |
+
try:
|
18 |
+
r = requests.get(url, params=params, timeout=5)
|
19 |
+
data = r.json()
|
20 |
+
if data.get("geonames"):
|
21 |
+
return data["geonames"][0]["countryName"]
|
22 |
+
except Exception as e:
|
23 |
+
print("GeoNames searchJSON error:", e)
|
24 |
+
return None
|
25 |
+
|
26 |
+
# Search for country info using alpha-2/3 codes or name
|
27 |
+
def get_country_from_countryinfo(input_code):
|
28 |
+
url = os.environ["URL_COUNTRYJSON"]
|
29 |
+
username = os.environ["USERNAME_GEO"]
|
30 |
+
params = {
|
31 |
+
"username": username
|
32 |
+
}
|
33 |
+
try:
|
34 |
+
r = requests.get(url, params=params, timeout=5)
|
35 |
+
data = r.json()
|
36 |
+
if data.get("geonames"):
|
37 |
+
input_code = input_code.strip().upper()
|
38 |
+
for country in data["geonames"]:
|
39 |
+
# Match against country name, country code (alpha-2), iso alpha-3
|
40 |
+
if input_code in [
|
41 |
+
country.get("countryName", "").upper(),
|
42 |
+
country.get("countryCode", "").upper(),
|
43 |
+
country.get("isoAlpha3", "").upper()
|
44 |
+
]:
|
45 |
+
return country["countryName"]
|
46 |
+
except Exception as e:
|
47 |
+
print("GeoNames countryInfoJSON error:", e)
|
48 |
+
return None
|
49 |
+
|
50 |
+
# Combined smart lookup
|
51 |
+
def smart_country_lookup(user_input):
|
52 |
+
raw_input = user_input.strip()
|
53 |
+
normalized = re.sub(r"[^a-zA-Z0-9]", "", user_input).upper() # normalize for codes (no strip spaces!)
|
54 |
+
|
55 |
+
# Special case: if user writes "UK: London" → split and take main country part
|
56 |
+
if ":" in raw_input:
|
57 |
+
raw_input = raw_input.split(":")[0].strip() # only take "UK"
|
58 |
+
# First try as country code (if 2-3 letters or common abbreviation)
|
59 |
+
if len(normalized) <= 3:
|
60 |
+
if normalized.upper() in ["UK","U.K","U.K."]:
|
61 |
+
country = get_country_from_geonames(normalized.upper())
|
62 |
+
if country:
|
63 |
+
return country
|
64 |
+
else:
|
65 |
+
country = get_country_from_countryinfo(raw_input)
|
66 |
+
if country:
|
67 |
+
return country
|
68 |
+
country = get_country_from_countryinfo(raw_input) # try full names
|
69 |
+
if country:
|
70 |
+
return country
|
71 |
+
# Otherwise, treat as city/place
|
72 |
+
country = get_country_from_geonames(raw_input)
|
73 |
+
if country:
|
74 |
+
return country
|
75 |
+
|
76 |
return "Not found"
|