VyLala commited on
Commit
5d64337
·
verified ·
1 Parent(s): 2708f73

Update standardize_location.py

Browse files
Files changed (1) hide show
  1. standardize_location.py +91 -83
standardize_location.py CHANGED
@@ -1,83 +1,91 @@
1
- import requests
2
- import re
3
- import os
4
- # Normalize input
5
- def normalize_key(text):
6
- return re.sub(r"[^a-z0-9]", "", text.strip().lower())
7
-
8
- # Search for city/place (normal flow)
9
- def get_country_from_geonames(city_name):
10
- url = os.environ["URL_SEARCHJSON"]
11
- username = os.environ["USERNAME_GEO"]
12
- print("geoname: ", cityname)
13
- params = {
14
- "q": city_name,
15
- "maxRows": 1,
16
- "username": username
17
- }
18
- try:
19
- r = requests.get(url, params=params, timeout=5)
20
- data = r.json()
21
- if data.get("geonames"):
22
- return data["geonames"][0]["countryName"]
23
- except Exception as e:
24
- print("GeoNames searchJSON error:", e)
25
- return None
26
-
27
- # Search for country info using alpha-2/3 codes or name
28
- def get_country_from_countryinfo(input_code):
29
- url = os.environ["URL_COUNTRYJSON"]
30
- username = os.environ["USERNAME_GEO"]
31
- print("countryINFO: ", input_code)
32
- params = {
33
- "username": username
34
- }
35
- try:
36
- r = requests.get(url, params=params, timeout=5)
37
- data = r.json()
38
- if data.get("geonames"):
39
- input_code = input_code.strip().upper()
40
- for country in data["geonames"]:
41
- # Match against country name, country code (alpha-2), iso alpha-3
42
- if input_code in [
43
- country.get("countryName", "").upper(),
44
- country.get("countryCode", "").upper(),
45
- country.get("isoAlpha3", "").upper()
46
- ]:
47
- return country["countryName"]
48
- except Exception as e:
49
- print("GeoNames countryInfoJSON error:", e)
50
- return None
51
-
52
- # Combined smart lookup
53
- def smart_country_lookup(user_input):
54
- raw_input = user_input.strip()
55
- normalized = re.sub(r"[^a-zA-Z0-9]", "", user_input).upper() # normalize for codes (no strip spaces!)
56
- print(raw_input, normalized)
57
- # Special case: if user writes "UK: London" split and take main country part
58
- if ":" in raw_input:
59
- raw_input = raw_input.split(":")[0].strip() # only take "UK"
60
- # First try as country code (if 2-3 letters or common abbreviation)
61
- if len(normalized) <= 3:
62
- if normalized.upper() in ["UK","U.K","U.K."]:
63
- country = get_country_from_geonames(normalized.upper())
64
- print("get_country_from_geonames(normalized.upper()) ", country)
65
- if country:
66
- return country
67
- else:
68
- country = get_country_from_countryinfo(raw_input)
69
- print("get_country_from_countryinfo(raw_input) ", country)
70
- if country:
71
- return country
72
- print(raw_input)
73
- country = get_country_from_countryinfo(raw_input) # try full names
74
- print("get_country_from_countryinfo(raw_input) ", country)
75
- if country:
76
- return country
77
- # Otherwise, treat as city/place
78
- country = get_country_from_geonames(raw_input)
79
- print("get_country_from_geonames(raw_input) ", country)
80
- if country:
81
- return country
82
-
83
- return "Not found"
 
 
 
 
 
 
 
 
 
1
+ import requests
2
+ import re
3
+ import os
4
+ import model
5
+ # Normalize input
6
+ def normalize_key(text):
7
+ return re.sub(r"[^a-z0-9]", "", text.strip().lower())
8
+
9
+ # Search for city/place (normal flow)
10
+ def get_country_from_geonames(city_name):
11
+ url = os.environ["URL_SEARCHJSON"]
12
+ username = os.environ["USERNAME_GEO"]
13
+ print("geoname: ", cityname)
14
+ params = {
15
+ "q": city_name,
16
+ "maxRows": 1,
17
+ "username": username
18
+ }
19
+ try:
20
+ r = requests.get(url, params=params, timeout=5)
21
+ data = r.json()
22
+ if data.get("geonames"):
23
+ return data["geonames"][0]["countryName"]
24
+ except Exception as e:
25
+ print("GeoNames searchJSON error:", e)
26
+ return None
27
+
28
+ # Search for country info using alpha-2/3 codes or name
29
+ def get_country_from_countryinfo(input_code):
30
+ url = os.environ["URL_COUNTRYJSON"]
31
+ username = os.environ["USERNAME_GEO"]
32
+ print("countryINFO: ", input_code)
33
+ params = {
34
+ "username": username
35
+ }
36
+ try:
37
+ r = requests.get(url, params=params, timeout=5)
38
+ data = r.json()
39
+ if data.get("geonames"):
40
+ input_code = input_code.strip().upper()
41
+ for country in data["geonames"]:
42
+ # Match against country name, country code (alpha-2), iso alpha-3
43
+ if input_code in [
44
+ country.get("countryName", "").upper(),
45
+ country.get("countryCode", "").upper(),
46
+ country.get("isoAlpha3", "").upper()
47
+ ]:
48
+ return country["countryName"]
49
+ except Exception as e:
50
+ print("GeoNames countryInfoJSON error:", e)
51
+ return None
52
+
53
+ # Combined smart lookup
54
+ def smart_country_lookup(user_input):
55
+ try:
56
+ raw_input = user_input.strip()
57
+ normalized = re.sub(r"[^a-zA-Z0-9]", "", user_input).upper() # normalize for codes (no strip spaces!)
58
+ print("raw input for smart country lookup: ",raw_input, ". Normalized country: ", normalized)
59
+ # Special case: if user writes "UK: London" split and take main country part
60
+ if ":" in raw_input:
61
+ raw_input = raw_input.split(":")[0].strip() # only take "UK"
62
+ # First try as country code (if 2-3 letters or common abbreviation)
63
+ if len(normalized) <= 3:
64
+ if normalized.upper() in ["UK","U.K","U.K."]:
65
+ country = get_country_from_geonames(normalized.upper())
66
+ print("get_country_from_geonames(normalized.upper()) ", country)
67
+ if country:
68
+ return country
69
+ else:
70
+ country = get_country_from_countryinfo(raw_input)
71
+ print("get_country_from_countryinfo(raw_input) ", country)
72
+ if country:
73
+ return country
74
+ print(raw_input)
75
+ country = get_country_from_countryinfo(raw_input) # try full names
76
+ print("get_country_from_countryinfo(raw_input) ", country)
77
+ if country:
78
+ return country
79
+ # Otherwise, treat as city/place
80
+ country = get_country_from_geonames(raw_input)
81
+ print("get_country_from_geonames(raw_input) ", country)
82
+ if country:
83
+ return country
84
+
85
+ return "Not found"
86
+ except:
87
+ country = model.get_country_from_text(user_input)
88
+ if country !="unknown":
89
+ return country
90
+ else:
91
+ return "Not found"