Spaces:
Running
Running
Update standardize_location.py
Browse files- standardize_location.py +91 -83
standardize_location.py
CHANGED
@@ -1,83 +1,91 @@
|
|
1 |
-
import requests
|
2 |
-
import re
|
3 |
-
import os
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
"
|
16 |
-
"
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
|
27 |
-
|
28 |
-
|
29 |
-
|
30 |
-
|
31 |
-
|
32 |
-
|
33 |
-
|
34 |
-
|
35 |
-
|
36 |
-
|
37 |
-
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
43 |
-
|
44 |
-
country.get("
|
45 |
-
country.get("
|
46 |
-
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
|
64 |
-
|
65 |
-
|
66 |
-
|
67 |
-
|
68 |
-
|
69 |
-
|
70 |
-
|
71 |
-
|
72 |
-
|
73 |
-
|
74 |
-
|
75 |
-
|
76 |
-
|
77 |
-
|
78 |
-
|
79 |
-
|
80 |
-
|
81 |
-
|
82 |
-
|
83 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import requests
|
2 |
+
import re
|
3 |
+
import os
|
4 |
+
import model
|
5 |
+
# Normalize input
|
6 |
+
def normalize_key(text):
|
7 |
+
return re.sub(r"[^a-z0-9]", "", text.strip().lower())
|
8 |
+
|
9 |
+
# Search for city/place (normal flow)
|
10 |
+
def get_country_from_geonames(city_name):
|
11 |
+
url = os.environ["URL_SEARCHJSON"]
|
12 |
+
username = os.environ["USERNAME_GEO"]
|
13 |
+
print("geoname: ", cityname)
|
14 |
+
params = {
|
15 |
+
"q": city_name,
|
16 |
+
"maxRows": 1,
|
17 |
+
"username": username
|
18 |
+
}
|
19 |
+
try:
|
20 |
+
r = requests.get(url, params=params, timeout=5)
|
21 |
+
data = r.json()
|
22 |
+
if data.get("geonames"):
|
23 |
+
return data["geonames"][0]["countryName"]
|
24 |
+
except Exception as e:
|
25 |
+
print("GeoNames searchJSON error:", e)
|
26 |
+
return None
|
27 |
+
|
28 |
+
# Search for country info using alpha-2/3 codes or name
|
29 |
+
def get_country_from_countryinfo(input_code):
|
30 |
+
url = os.environ["URL_COUNTRYJSON"]
|
31 |
+
username = os.environ["USERNAME_GEO"]
|
32 |
+
print("countryINFO: ", input_code)
|
33 |
+
params = {
|
34 |
+
"username": username
|
35 |
+
}
|
36 |
+
try:
|
37 |
+
r = requests.get(url, params=params, timeout=5)
|
38 |
+
data = r.json()
|
39 |
+
if data.get("geonames"):
|
40 |
+
input_code = input_code.strip().upper()
|
41 |
+
for country in data["geonames"]:
|
42 |
+
# Match against country name, country code (alpha-2), iso alpha-3
|
43 |
+
if input_code in [
|
44 |
+
country.get("countryName", "").upper(),
|
45 |
+
country.get("countryCode", "").upper(),
|
46 |
+
country.get("isoAlpha3", "").upper()
|
47 |
+
]:
|
48 |
+
return country["countryName"]
|
49 |
+
except Exception as e:
|
50 |
+
print("GeoNames countryInfoJSON error:", e)
|
51 |
+
return None
|
52 |
+
|
53 |
+
# Combined smart lookup
|
54 |
+
def smart_country_lookup(user_input):
|
55 |
+
try:
|
56 |
+
raw_input = user_input.strip()
|
57 |
+
normalized = re.sub(r"[^a-zA-Z0-9]", "", user_input).upper() # normalize for codes (no strip spaces!)
|
58 |
+
print("raw input for smart country lookup: ",raw_input, ". Normalized country: ", normalized)
|
59 |
+
# Special case: if user writes "UK: London" → split and take main country part
|
60 |
+
if ":" in raw_input:
|
61 |
+
raw_input = raw_input.split(":")[0].strip() # only take "UK"
|
62 |
+
# First try as country code (if 2-3 letters or common abbreviation)
|
63 |
+
if len(normalized) <= 3:
|
64 |
+
if normalized.upper() in ["UK","U.K","U.K."]:
|
65 |
+
country = get_country_from_geonames(normalized.upper())
|
66 |
+
print("get_country_from_geonames(normalized.upper()) ", country)
|
67 |
+
if country:
|
68 |
+
return country
|
69 |
+
else:
|
70 |
+
country = get_country_from_countryinfo(raw_input)
|
71 |
+
print("get_country_from_countryinfo(raw_input) ", country)
|
72 |
+
if country:
|
73 |
+
return country
|
74 |
+
print(raw_input)
|
75 |
+
country = get_country_from_countryinfo(raw_input) # try full names
|
76 |
+
print("get_country_from_countryinfo(raw_input) ", country)
|
77 |
+
if country:
|
78 |
+
return country
|
79 |
+
# Otherwise, treat as city/place
|
80 |
+
country = get_country_from_geonames(raw_input)
|
81 |
+
print("get_country_from_geonames(raw_input) ", country)
|
82 |
+
if country:
|
83 |
+
return country
|
84 |
+
|
85 |
+
return "Not found"
|
86 |
+
except:
|
87 |
+
country = model.get_country_from_text(user_input)
|
88 |
+
if country !="unknown":
|
89 |
+
return country
|
90 |
+
else:
|
91 |
+
return "Not found"
|